From a2775bbc1d58ce630517dfe86090c166f27d719f Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Tue, 12 Mar 2019 21:21:26 +0100 Subject: kernel/workqueue: Use __printf markup to silence compiler in function 'alloc_workqueue' Silence warnings (triggered at W=1) by adding relevant __printf attributes. kernel/workqueue.c:4249:2: warning: function 'alloc_workqueue' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format] Signed-off-by: Mathieu Malaterre Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4026d1871407..56b7cf898f10 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4208,6 +4208,7 @@ static int init_rescuer(struct workqueue_struct *wq) return 0; } +__printf(1, 4) struct workqueue_struct *alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...) -- cgit From 95e0b46fcebd7dbf6850dee96046e4c4ddc7f69c Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 7 Mar 2019 09:16:24 +0800 Subject: audit: fix a memleak caused by auditing load module module.name will be allocated unconditionally when auditing load module, and audit_log_start() can fail with other reasons, or audit_log_exit maybe not called, caused module.name is not freed so free module.name in audit_free_context and __audit_syscall_exit unreferenced object 0xffff88af90837d20 (size 8): comm "modprobe", pid 1036, jiffies 4294704867 (age 3069.138s) hex dump (first 8 bytes): 69 78 67 62 65 00 ff ff ixgbe... backtrace: [<0000000008da28fe>] __audit_log_kern_module+0x33/0x80 [<00000000c1491e61>] load_module+0x64f/0x3850 [<000000007fc9ae3f>] __do_sys_init_module+0x218/0x250 [<0000000000d4a478>] do_syscall_64+0x117/0x400 [<000000004924ded8>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000007dc331dd>] 0xffffffffffffffff Fixes: ca86cad7380e3 ("audit: log module name on init_module") Signed-off-by: Zhang Yu Signed-off-by: Li RongQing [PM: manual merge fixup in __audit_syscall_exit()] Signed-off-by: Paul Moore --- kernel/auditsc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d1eab1d4a930..fa7b8047aab8 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -840,6 +840,13 @@ static inline void audit_proctitle_free(struct audit_context *context) context->proctitle.len = 0; } +static inline void audit_free_module(struct audit_context *context) +{ + if (context->type == AUDIT_KERN_MODULE) { + kfree(context->module.name); + context->module.name = NULL; + } +} static inline void audit_free_names(struct audit_context *context) { struct audit_names *n, *next; @@ -923,6 +930,7 @@ int audit_alloc(struct task_struct *tsk) static inline void audit_free_context(struct audit_context *context) { + audit_free_module(context); audit_free_names(context); unroll_tree_refs(context, NULL, 0); free_tree_refs(context); @@ -1266,7 +1274,6 @@ static void show_special(struct audit_context *context, int *call_panic) audit_log_format(ab, "name="); if (context->module.name) { audit_log_untrustedstring(ab, context->module.name); - kfree(context->module.name); } else audit_log_format(ab, "(null)"); @@ -1697,6 +1704,7 @@ void __audit_syscall_exit(int success, long return_code) context->in_syscall = 0; context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; + audit_free_module(context); audit_free_names(context); unroll_tree_refs(context, NULL, 0); audit_free_aux(context); -- cgit From 8194fe94ab08f56ea55653df924647d23873d18c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 19 Mar 2019 10:45:09 -0700 Subject: kernel/workqueue: Document wq_worker_last_func() argument This patch avoids that the following warning is reported when building with W=1: kernel/workqueue.c:938: warning: Function parameter or member 'task' not described in 'wq_worker_last_func' Signed-off-by: Bart Van Assche Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 56b7cf898f10..21721faa923c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -913,6 +913,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) /** * wq_worker_last_func - retrieve worker's last work function + * @task: Task to retrieve last work function of. * * Determine the last function a worker executed. This is called from * the scheduler to get a worker's last known identity. -- cgit From 73e65b88feb919f95bdb77c4ed35f69588cf27ee Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 19 Mar 2019 15:23:29 -0400 Subject: audit: connect LOGIN record to its syscall record Currently the AUDIT_LOGIN event is a standalone record that isn't connected to any other records that may be part of its syscall event. To avoid the confusion of generating two events, connect the records by using its syscall context. Please see the github issue https://github.com/linux-audit/audit-kernel/issues/110 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index c89ea48c70a6..b96bf69183f4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -2220,7 +2220,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, if (!audit_enabled) return; - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); + ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; -- cgit From 2efa48fec0c344a6ca1bba66b15d63d38cf20199 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 20 Mar 2019 21:59:22 +0800 Subject: audit: Make audit_log_cap and audit_copy_inode static Fix sparse warning: kernel/auditsc.c:1150:6: warning: symbol 'audit_log_cap' was not declared. Should it be static? kernel/auditsc.c:1908:6: warning: symbol 'audit_copy_inode' was not declared. Should it be static? Signed-off-by: YueHaibing Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditsc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fa7b8047aab8..17b0007fafc2 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1147,7 +1147,8 @@ out: kfree(buf_head); } -void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) +static void audit_log_cap(struct audit_buffer *ab, char *prefix, + kernel_cap_t *cap) { int i; @@ -1905,8 +1906,9 @@ static inline int audit_copy_fcaps(struct audit_names *name, } /* Copy inode data into an audit_names. */ -void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, - struct inode *inode, unsigned int flags) +static void audit_copy_inode(struct audit_names *name, + const struct dentry *dentry, + struct inode *inode, unsigned int flags) { name->ino = inode->i_ino; name->dev = inode->i_sb->s_dev; -- cgit From 16add411645cff83360086e102daa67b25f1e39a Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 18 Mar 2019 02:30:18 +0300 Subject: syscall_get_arch: add "struct task_struct *" argument This argument is required to extend the generic ptrace API with PTRACE_GET_SYSCALL_INFO request: syscall_get_arch() is going to be called from ptrace_request() along with syscall_get_nr(), syscall_get_arguments(), syscall_get_error(), and syscall_get_return_value() functions with a tracee as their argument. The primary intent is that the triple (audit_arch, syscall_nr, arg1..arg6) should describe what system call is being called and what its arguments are. Reverts: 5e937a9ae913 ("syscall_get_arch: remove useless function arguments") Reverts: 1002d94d3076 ("syscall.h: fix doc text for syscall_get_arch()") Reviewed-by: Andy Lutomirski # for x86 Reviewed-by: Palmer Dabbelt Acked-by: Paul Moore Acked-by: Paul Burton # MIPS parts Acked-by: Michael Ellerman (powerpc) Acked-by: Kees Cook # seccomp parts Acked-by: Mark Salter # for the c6x bit Cc: Elvira Khabirova Cc: Eugene Syromyatnikov Cc: Oleg Nesterov Cc: x86@kernel.org Cc: linux-alpha@vger.kernel.org Cc: linux-snps-arc@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-mips@vger.kernel.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-arch@vger.kernel.org Cc: linux-audit@redhat.com Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Moore --- arch/alpha/include/asm/syscall.h | 2 +- arch/arc/include/asm/syscall.h | 2 +- arch/arm/include/asm/syscall.h | 2 +- arch/arm64/include/asm/syscall.h | 4 ++-- arch/c6x/include/asm/syscall.h | 2 +- arch/csky/include/asm/syscall.h | 2 +- arch/h8300/include/asm/syscall.h | 2 +- arch/hexagon/include/asm/syscall.h | 2 +- arch/ia64/include/asm/syscall.h | 2 +- arch/m68k/include/asm/syscall.h | 2 +- arch/microblaze/include/asm/syscall.h | 2 +- arch/mips/include/asm/syscall.h | 6 +++--- arch/mips/kernel/ptrace.c | 2 +- arch/nds32/include/asm/syscall.h | 2 +- arch/nios2/include/asm/syscall.h | 2 +- arch/openrisc/include/asm/syscall.h | 2 +- arch/parisc/include/asm/syscall.h | 4 ++-- arch/powerpc/include/asm/syscall.h | 10 ++++++++-- arch/riscv/include/asm/syscall.h | 2 +- arch/s390/include/asm/syscall.h | 4 ++-- arch/sh/include/asm/syscall_32.h | 2 +- arch/sh/include/asm/syscall_64.h | 2 +- arch/sparc/include/asm/syscall.h | 5 +++-- arch/unicore32/include/asm/syscall.h | 2 +- arch/x86/include/asm/syscall.h | 8 +++++--- arch/x86/um/asm/syscall.h | 2 +- arch/xtensa/include/asm/syscall.h | 2 +- include/asm-generic/syscall.h | 5 +++-- kernel/auditsc.c | 4 ++-- kernel/seccomp.c | 4 ++-- 30 files changed, 52 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h index d73a6fcb519c..11c688c1d7ec 100644 --- a/arch/alpha/include/asm/syscall.h +++ b/arch/alpha/include/asm/syscall.h @@ -4,7 +4,7 @@ #include -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_ALPHA; } diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h index c7fc4c0c3bcb..caf2697ef5b7 100644 --- a/arch/arc/include/asm/syscall.h +++ b/arch/arc/include/asm/syscall.h @@ -70,7 +70,7 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, } static inline int -syscall_get_arch(void) +syscall_get_arch(struct task_struct *task) { return IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 06dea6bce293..3940ceac0bdc 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -104,7 +104,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->ARM_r0 + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { /* ARM tasks don't change audit architectures on the fly. */ return AUDIT_ARCH_ARM; diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index ad8be16a39c9..1870df03f774 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -117,9 +117,9 @@ static inline void syscall_set_arguments(struct task_struct *task, * We don't care about endianness (__AUDIT_ARCH_LE bit) here because * AArch64 has the same system calls both on little- and big- endian. */ -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { - if (is_compat_task()) + if (is_compat_thread(task_thread_info(task))) return AUDIT_ARCH_ARM; return AUDIT_ARCH_AARCH64; diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h index 39dbd1ef994c..595057191c9c 100644 --- a/arch/c6x/include/asm/syscall.h +++ b/arch/c6x/include/asm/syscall.h @@ -121,7 +121,7 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? AUDIT_ARCH_C6XBE : AUDIT_ARCH_C6X; diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index d637445737b7..150ffb894fa2 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -70,7 +70,7 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, } static inline int -syscall_get_arch(void) +syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_CSKY; } diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h index 5135910616e2..d316c3d40d4e 100644 --- a/arch/h8300/include/asm/syscall.h +++ b/arch/h8300/include/asm/syscall.h @@ -49,7 +49,7 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, } static inline int -syscall_get_arch(void) +syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_H8300; } diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h index de3917aad3fd..47b0bc3f16be 100644 --- a/arch/hexagon/include/asm/syscall.h +++ b/arch/hexagon/include/asm/syscall.h @@ -46,7 +46,7 @@ static inline void syscall_get_arguments(struct task_struct *task, memcpy(args, &(®s->r00)[i], n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_HEXAGON; } diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 1d0b875fec44..47ab33f5448a 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -81,7 +81,7 @@ static inline void syscall_set_arguments(struct task_struct *task, ia64_syscall_get_set_arguments(task, regs, i, n, args, 1); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_IA64; } diff --git a/arch/m68k/include/asm/syscall.h b/arch/m68k/include/asm/syscall.h index d4d7deda8d50..465ac039be09 100644 --- a/arch/m68k/include/asm/syscall.h +++ b/arch/m68k/include/asm/syscall.h @@ -4,7 +4,7 @@ #include -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_M68K; } diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h index 220decd605a4..77a86fafa974 100644 --- a/arch/microblaze/include/asm/syscall.h +++ b/arch/microblaze/include/asm/syscall.h @@ -101,7 +101,7 @@ static inline void syscall_set_arguments(struct task_struct *task, asmlinkage unsigned long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_leave(struct pt_regs *regs); -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_MICROBLAZE; } diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 6cf8ffb5367e..6a22c9352ef6 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -140,14 +140,14 @@ extern const unsigned long sys_call_table[]; extern const unsigned long sys32_call_table[]; extern const unsigned long sysn32_call_table[]; -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_MIPS; #ifdef CONFIG_64BIT - if (!test_thread_flag(TIF_32BIT_REGS)) { + if (!test_tsk_thread_flag(task, TIF_32BIT_REGS)) { arch |= __AUDIT_ARCH_64BIT; /* N32 sets only TIF_32BIT_ADDR */ - if (test_thread_flag(TIF_32BIT_ADDR)) + if (test_tsk_thread_flag(task, TIF_32BIT_ADDR)) arch |= __AUDIT_ARCH_CONVENTION_MIPS64_N32; } #endif diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 0057c910bc2f..2ead6ff919b7 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1418,7 +1418,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) unsigned long args[6]; sd.nr = syscall; - sd.arch = syscall_get_arch(); + sd.arch = syscall_get_arch(current); syscall_get_arguments(current, regs, 0, 6, args); for (i = 0; i < 6; i++) sd.args[i] = args[i]; diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index cc56a3962f8b..7501e376a6b1 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -188,7 +188,7 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, } static inline int -syscall_get_arch(void) +syscall_get_arch(struct task_struct *task) { return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? AUDIT_ARCH_NDS32BE : AUDIT_ARCH_NDS32; diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h index cf35e210fc4d..f0f6ae208e78 100644 --- a/arch/nios2/include/asm/syscall.h +++ b/arch/nios2/include/asm/syscall.h @@ -136,7 +136,7 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_NIOS2; } diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h index 2db9f1cf0694..46b10c674bd2 100644 --- a/arch/openrisc/include/asm/syscall.h +++ b/arch/openrisc/include/asm/syscall.h @@ -72,7 +72,7 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, memcpy(®s->gpr[3 + i], args, n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_OPENRISC; } diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h index 8bff1a58c97f..c04ffc6ac928 100644 --- a/arch/parisc/include/asm/syscall.h +++ b/arch/parisc/include/asm/syscall.h @@ -62,11 +62,11 @@ static inline void syscall_rollback(struct task_struct *task, /* do nothing */ } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_PARISC; #ifdef CONFIG_64BIT - if (!is_compat_task()) + if (!__is_compat_task(task)) arch = AUDIT_ARCH_PARISC64; #endif return arch; diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 1a0e7a8b1c81..efb50429c9f4 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -99,9 +99,15 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->orig_gpr3 = args[0]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { - int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; + int arch; + + if (IS_ENABLED(CONFIG_PPC64) && !test_tsk_thread_flag(task, TIF_32BIT)) + arch = AUDIT_ARCH_PPC64; + else + arch = AUDIT_ARCH_PPC; + #ifdef __LITTLE_ENDIAN__ arch |= __AUDIT_ARCH_LE; #endif diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index bba3da6ef157..ca120a36a037 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -100,7 +100,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->a1 + i * sizeof(regs->a1), args, n * sizeof(regs->a0)); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_64BIT return AUDIT_ARCH_RISCV64; diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 96f9a9151fde..5a40ea8b90ea 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -92,10 +92,10 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->orig_gpr2 = args[0]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(current, TIF_31BIT)) + if (test_tsk_thread_flag(task, TIF_31BIT)) return AUDIT_ARCH_S390; #endif return AUDIT_ARCH_S390X; diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 6e118799831c..08de429eccd4 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -95,7 +95,7 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_SH; diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index 43882580c7f9..9b62a2404531 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -63,7 +63,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->regs[2 + i], args, n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_SH; diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 053989e3f6a6..9ffb367c17fd 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -128,10 +128,11 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->u_regs[UREG_I0 + i + j] = args[j]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #if defined(CONFIG_SPARC64) && defined(CONFIG_COMPAT) - return in_compat_syscall() ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64; + return test_tsk_thread_flag(task, TIF_32BIT) + ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64; #elif defined(CONFIG_SPARC64) return AUDIT_ARCH_SPARC64; #else diff --git a/arch/unicore32/include/asm/syscall.h b/arch/unicore32/include/asm/syscall.h index 3a6b885476b4..607961797fff 100644 --- a/arch/unicore32/include/asm/syscall.h +++ b/arch/unicore32/include/asm/syscall.h @@ -4,7 +4,7 @@ #include -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_UNICORE; } diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d653139857af..435f3f09279c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -107,7 +107,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } @@ -236,10 +236,12 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ - return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + return (IS_ENABLED(CONFIG_IA32_EMULATION) && + task->thread_info.status & TS_COMPAT) + ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h index ef898af102d1..56a2f0913e3c 100644 --- a/arch/x86/um/asm/syscall.h +++ b/arch/x86/um/asm/syscall.h @@ -9,7 +9,7 @@ typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_X86_32 return AUDIT_ARCH_I386; diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index a168bf81c7f4..0681ca656809 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -14,7 +14,7 @@ #include #include -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_XTENSA; } diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 0c938a4354f6..e0d060b43321 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -144,14 +144,15 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, /** * syscall_get_arch - return the AUDIT_ARCH for the current system call + * @task: task of interest, must be blocked * * Returns the AUDIT_ARCH_* based on the system call convention in use. * - * It's only valid to call this when current is stopped on entry to a system + * It's only valid to call this when @task is stopped on entry to a system * call, due to %TIF_SYSCALL_TRACE, %TIF_SYSCALL_AUDIT, or %TIF_SECCOMP. * * Architectures which permit CONFIG_HAVE_ARCH_SECCOMP_FILTER must * provide an implementation of this. */ -int syscall_get_arch(void); +int syscall_get_arch(struct task_struct *task); #endif /* _ASM_SYSCALL_H */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 17b0007fafc2..98a98e6dca05 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1636,7 +1636,7 @@ void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2, return; } - context->arch = syscall_get_arch(); + context->arch = syscall_get_arch(current); context->major = major; context->argv[0] = a1; context->argv[1] = a2; @@ -2590,7 +2590,7 @@ void audit_seccomp(unsigned long syscall, long signr, int code) return; audit_log_task(ab); audit_log_format(ab, " sig=%ld arch=%x syscall=%ld compat=%d ip=0x%lx code=0x%x", - signr, syscall_get_arch(), syscall, + signr, syscall_get_arch(current), syscall, in_compat_syscall(), KSTK_EIP(current), code); audit_log_end(ab); } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 54a0347ca812..36f36ab00f48 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -148,7 +148,7 @@ static void populate_seccomp_data(struct seccomp_data *sd) unsigned long args[6]; sd->nr = syscall_get_nr(task, regs); - sd->arch = syscall_get_arch(); + sd->arch = syscall_get_arch(task); syscall_get_arguments(task, regs, 0, 6, args); sd->args[0] = args[0]; sd->args[1] = args[1]; @@ -591,7 +591,7 @@ static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason info->si_code = SYS_SECCOMP; info->si_call_addr = (void __user *)KSTK_EIP(current); info->si_errno = reason; - info->si_arch = syscall_get_arch(); + info->si_arch = syscall_get_arch(current); info->si_syscall = syscall; } -- cgit From 0f3adc288df8ba2ac2fea0a8e4890f9fb4cd075d Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 22 Mar 2019 09:53:59 +0800 Subject: bpf: track references based on is_acquire_func So far, the verifier only acquires reference tracking state for RET_PTR_TO_SOCKET_OR_NULL. Instead of extending this for every new return type which desires these semantics, acquire reference tracking state iff the called helper is an acquire function. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 86f9cd5d1c4e..868a82ad5597 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3147,19 +3147,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - if (is_acquire_function(func_id)) { - int id = acquire_reference_state(env, insn_idx); - - if (id < 0) - return id; - /* For mark_ptr_or_null_reg() */ - regs[BPF_REG_0].id = id; - /* For release_reference() */ - regs[BPF_REG_0].ref_obj_id = id; - } else { - /* For mark_ptr_or_null_reg() */ - regs[BPF_REG_0].id = ++env->id_gen; - } + regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; @@ -3170,9 +3158,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } - if (is_ptr_cast_function(func_id)) + if (is_ptr_cast_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + } else if (is_acquire_function(func_id)) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + /* For mark_ptr_or_null_reg() */ + regs[BPF_REG_0].id = id; + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = id; + } do_refine_retval_range(regs, fn->ret_type, func_id, &meta); -- cgit From 85a51f8c28b9812642d76db6889f3f39dc3fbab3 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 22 Mar 2019 09:54:00 +0800 Subject: bpf: allow helpers to return PTR_TO_SOCK_COMMON It's currently not possible to access timewait or request sockets from eBPF, since there is no way to return a PTR_TO_SOCK_COMMON from a helper. Introduce RET_PTR_TO_SOCK_COMMON to enable this behaviour. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/verifier.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f02367faa58d..f62897198844 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -205,6 +205,7 @@ enum bpf_return_type { RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ + RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 868a82ad5597..a476e13201d6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3148,6 +3148,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; regs[BPF_REG_0].id = ++env->id_gen; + } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; -- cgit From edbf8c01de5a104a71ed6df2bf6421ceb2836a8e Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 22 Mar 2019 09:54:01 +0800 Subject: bpf: add skc_lookup_tcp helper Allow looking up a sock_common. This gives eBPF programs access to timewait and request sockets. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 20 ++++++- kernel/bpf/verifier.c | 3 +- net/core/filter.c | 144 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 143 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 929c8e537a14..fab05317f5e7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2431,6 +2431,23 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. + * + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to bpf_sk_lookup_tcp, except that it + * also returns timewait or request sockets. Use bpf_sk_fullsock + * or bpf_tcp_socket to access the full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2531,7 +2548,8 @@ union bpf_attr { FN(sk_fullsock), \ FN(tcp_sock), \ FN(skb_ecn_set_ce), \ - FN(get_listener_sock), + FN(get_listener_sock), \ + FN(skc_lookup_tcp), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a476e13201d6..dffeec3706ce 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -369,7 +369,8 @@ static bool is_release_function(enum bpf_func_id func_id) static bool is_acquire_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_lookup_tcp || - func_id == BPF_FUNC_sk_lookup_udp; + func_id == BPF_FUNC_sk_lookup_udp || + func_id == BPF_FUNC_skc_lookup_tcp; } static bool is_ptr_cast_function(enum bpf_func_id func_id) diff --git a/net/core/filter.c b/net/core/filter.c index 647c63a7b25b..b6d83ba97621 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5156,15 +5156,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, return sk; } -/* bpf_sk_lookup performs the core lookup for different types of sockets, +/* bpf_skc_lookup performs the core lookup for different types of sockets, * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. * Returns the socket as an 'unsigned long' to simplify the casting in the * callers to satisfy BPF_CALL declarations. */ -static unsigned long -__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, - struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, - u64 flags) +static struct sock * +__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, + u64 flags) { struct sock *sk = NULL; u8 family = AF_UNSPEC; @@ -5192,15 +5192,27 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, put_net(net); } +out: + return sk; +} + +static struct sock * +__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, + u64 flags) +{ + struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net, + ifindex, proto, netns_id, flags); + if (sk) sk = sk_to_full_sk(sk); -out: - return (unsigned long) sk; + + return sk; } -static unsigned long -bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, - u8 proto, u64 netns_id, u64 flags) +static struct sock * +bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) { struct net *caller_net; int ifindex; @@ -5213,14 +5225,47 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ifindex = 0; } - return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex, - proto, netns_id, flags); + return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto, + netns_id, flags); } +static struct sock * +bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) +{ + struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id, + flags); + + if (sk) + sk = sk_to_full_sk(sk); + + return sk; +} + +BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP, + netns_id, flags); +} + +static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { + .func = bpf_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags); + return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, + netns_id, flags); } static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { @@ -5238,7 +5283,8 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags); + return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, + netns_id, flags); } static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { @@ -5273,8 +5319,9 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, struct net *caller_net = dev_net(ctx->rxq->dev); int ifindex = ctx->rxq->dev->ifindex; - return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, - IPPROTO_UDP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_UDP, netns_id, + flags); } static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { @@ -5289,14 +5336,38 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) +{ + struct net *caller_net = dev_net(ctx->rxq->dev); + int ifindex = ctx->rxq->dev->ifindex; + + return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); +} + +static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { + .func = bpf_xdp_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { struct net *caller_net = dev_net(ctx->rxq->dev); int ifindex = ctx->rxq->dev->ifindex; - return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, - IPPROTO_TCP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); } static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { @@ -5311,11 +5382,31 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, + IPPROTO_TCP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { + .func = bpf_sock_addr_skc_lookup_tcp, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_TCP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, IPPROTO_TCP, + netns_id, flags); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { @@ -5332,8 +5423,9 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_UDP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, IPPROTO_UDP, + netns_id, flags); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { @@ -5586,6 +5678,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_addr_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_sock_addr_skc_lookup_tcp_proto; #endif /* CONFIG_INET */ default: return bpf_base_func_proto(func_id); @@ -5719,6 +5813,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_sock_proto; case BPF_FUNC_get_listener_sock: return &bpf_get_listener_sock_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_skc_lookup_tcp_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5754,6 +5850,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_sk_lookup_tcp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_xdp_skc_lookup_tcp_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5846,6 +5944,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_skc_lookup_tcp_proto; #endif default: return bpf_base_func_proto(func_id); -- cgit From 3b0f31f2b8c9fb348e4530b88f6b64f9621f83d6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Mar 2019 22:51:02 +0100 Subject: genetlink: make policy common to family Since maxattr is common, the policy can't really differ sanely, so make it common as well. The only user that did in fact manage to make a non-common policy is taskstats, which has to be really careful about it (since it's still using a common maxattr!). This is no longer supported, but we can fake it using pre_doit. This reduces the size of e.g. nl80211.o (which has lots of commands): text data bss dec hex filename 398745 14323 2240 415308 6564c net/wireless/nl80211.o (before) 397913 14331 2240 414484 65314 net/wireless/nl80211.o (after) -------------------------------- -832 +8 0 -824 Which is obviously just 8 bytes for each command, and an added 8 bytes for the new policy pointer. I'm not sure why the ops list is counted as .text though. Most of the code transformations were done using the following spatch: @ops@ identifier OPS; expression POLICY; @@ struct genl_ops OPS[] = { ..., { - .policy = POLICY, }, ... }; @@ identifier ops.OPS; expression ops.POLICY; identifier fam; expression M; @@ struct genl_family fam = { .ops = OPS, .maxattr = M, + .policy = POLICY, ... }; This also gets rid of devlink_nl_cmd_region_read_dumpit() accessing the cb->data as ops, which we want to change in a later genl patch. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/block/nbd.c | 5 +- drivers/net/gtp.c | 4 +- drivers/net/ieee802154/mac802154_hwsim.c | 7 +-- drivers/net/macsec.c | 11 +--- drivers/net/team/team.c | 5 +- drivers/net/wireless/mac80211_hwsim.c | 7 +-- drivers/target/target_core_user.c | 5 +- include/linux/genl_magic_func.h | 4 +- include/net/genetlink.h | 4 +- kernel/taskstats.c | 28 ++++++++- net/batman-adv/netlink.c | 19 +----- net/core/devlink.c | 43 +------------ net/hsr/hsr_netlink.c | 3 +- net/ieee802154/ieee802154.h | 2 - net/ieee802154/netlink.c | 1 + net/ieee802154/nl802154.c | 30 +-------- net/ipv4/fou.c | 4 +- net/ipv4/tcp_metrics.c | 3 +- net/ipv6/ila/ila_main.c | 5 +- net/ipv6/seg6.c | 5 +- net/l2tp/l2tp_netlink.c | 10 +-- net/ncsi/ncsi-netlink.c | 7 +-- net/netfilter/ipvs/ip_vs_ctl.c | 13 +--- net/netlabel/netlabel_calipso.c | 5 +- net/netlabel/netlabel_cipso_v4.c | 5 +- net/netlabel/netlabel_mgmt.c | 9 +-- net/netlabel/netlabel_unlabeled.c | 9 +-- net/netlink/genetlink.c | 6 +- net/nfc/netlink.c | 20 +----- net/openvswitch/conntrack.c | 4 +- net/openvswitch/datapath.c | 17 ++--- net/openvswitch/meter.c | 5 +- net/smc/smc_pnet.c | 5 +- net/tipc/netlink.c | 22 +------ net/wimax/stack.c | 5 +- net/wireless/nl80211.c | 105 +------------------------------ 36 files changed, 68 insertions(+), 374 deletions(-) (limited to 'kernel') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 90ba9f4c03f3..92b8aafb8bb4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1999,22 +1999,18 @@ out: static const struct genl_ops nbd_connect_genl_ops[] = { { .cmd = NBD_CMD_CONNECT, - .policy = nbd_attr_policy, .doit = nbd_genl_connect, }, { .cmd = NBD_CMD_DISCONNECT, - .policy = nbd_attr_policy, .doit = nbd_genl_disconnect, }, { .cmd = NBD_CMD_RECONFIGURE, - .policy = nbd_attr_policy, .doit = nbd_genl_reconfigure, }, { .cmd = NBD_CMD_STATUS, - .policy = nbd_attr_policy, .doit = nbd_genl_status, }, }; @@ -2031,6 +2027,7 @@ static struct genl_family nbd_genl_family __ro_after_init = { .ops = nbd_connect_genl_ops, .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), .maxattr = NBD_ATTR_MAX, + .policy = nbd_attr_policy, .mcgrps = nbd_mcast_grps, .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), }; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 7a145172d503..c06e31747288 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1271,20 +1271,17 @@ static const struct genl_ops gtp_genl_ops[] = { { .cmd = GTP_CMD_NEWPDP, .doit = gtp_genl_new_pdp, - .policy = gtp_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_DELPDP, .doit = gtp_genl_del_pdp, - .policy = gtp_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_GETPDP, .doit = gtp_genl_get_pdp, .dumpit = gtp_genl_dump_pdp, - .policy = gtp_genl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -1294,6 +1291,7 @@ static struct genl_family gtp_genl_family __ro_after_init = { .version = 0, .hdrsize = 0, .maxattr = GTPA_MAX, + .policy = gtp_genl_policy, .netnsok = true, .module = THIS_MODULE, .ops = gtp_genl_ops, diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index b6743f03dce0..49866737f138 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -598,37 +598,31 @@ static const struct nla_policy hwsim_genl_policy[MAC802154_HWSIM_ATTR_MAX + 1] = static const struct genl_ops hwsim_nl_ops[] = { { .cmd = MAC802154_HWSIM_CMD_NEW_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_GET_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, { .cmd = MAC802154_HWSIM_CMD_NEW_EDGE, - .policy = hwsim_genl_policy, .doit = hwsim_new_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_EDGE, - .policy = hwsim_genl_policy, .doit = hwsim_del_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_SET_EDGE, - .policy = hwsim_genl_policy, .doit = hwsim_set_edge_lqi, .flags = GENL_UNS_ADMIN_PERM, }, @@ -638,6 +632,7 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC802154_HWSIM", .version = 1, .maxattr = MAC802154_HWSIM_ATTR_MAX, + .policy = hwsim_genl_policy, .module = THIS_MODULE, .ops = hwsim_nl_ops, .n_ops = ARRAY_SIZE(hwsim_nl_ops), diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 64a982563d59..947c40f112d1 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2637,60 +2637,50 @@ static const struct genl_ops macsec_genl_ops[] = { { .cmd = MACSEC_CMD_GET_TXSC, .dumpit = macsec_dump_txsc, - .policy = macsec_genl_policy, }, { .cmd = MACSEC_CMD_ADD_RXSC, .doit = macsec_add_rxsc, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSC, .doit = macsec_del_rxsc, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSC, .doit = macsec_upd_rxsc, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_TXSA, .doit = macsec_add_txsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_TXSA, .doit = macsec_del_txsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_TXSA, .doit = macsec_upd_txsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_RXSA, .doit = macsec_add_rxsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSA, .doit = macsec_del_rxsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSA, .doit = macsec_upd_rxsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -2700,6 +2690,7 @@ static struct genl_family macsec_fam __ro_after_init = { .hdrsize = 0, .version = MACSEC_GENL_VERSION, .maxattr = MACSEC_ATTR_MAX, + .policy = macsec_genl_policy, .netnsok = true, .module = THIS_MODULE, .ops = macsec_genl_ops, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ee950aa48e3b..6ad74f898832 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2725,24 +2725,20 @@ static const struct genl_ops team_nl_ops[] = { { .cmd = TEAM_CMD_NOOP, .doit = team_nl_cmd_noop, - .policy = team_nl_policy, }, { .cmd = TEAM_CMD_OPTIONS_SET, .doit = team_nl_cmd_options_set, - .policy = team_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = TEAM_CMD_OPTIONS_GET, .doit = team_nl_cmd_options_get, - .policy = team_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = TEAM_CMD_PORT_LIST_GET, .doit = team_nl_cmd_port_list_get, - .policy = team_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -2755,6 +2751,7 @@ static struct genl_family team_nl_family __ro_after_init = { .name = TEAM_GENL_NAME, .version = TEAM_GENL_VERSION, .maxattr = TEAM_ATTR_MAX, + .policy = team_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = team_nl_ops, diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 0838af04d681..4cc7b222859c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3620,35 +3620,29 @@ done: static const struct genl_ops hwsim_ops[] = { { .cmd = HWSIM_CMD_REGISTER, - .policy = hwsim_genl_policy, .doit = hwsim_register_received_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_FRAME, - .policy = hwsim_genl_policy, .doit = hwsim_cloned_frame_received_nl, }, { .cmd = HWSIM_CMD_TX_INFO_FRAME, - .policy = hwsim_genl_policy, .doit = hwsim_tx_info_frame_received_nl, }, { .cmd = HWSIM_CMD_NEW_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_DEL_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_GET_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, @@ -3658,6 +3652,7 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC80211_HWSIM", .version = 1, .maxattr = HWSIM_ATTR_MAX, + .policy = hwsim_genl_policy, .netnsok = true, .module = THIS_MODULE, .ops = hwsim_ops, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 5831e0eecea1..845b32eaff36 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -442,25 +442,21 @@ static const struct genl_ops tcmu_genl_ops[] = { { .cmd = TCMU_CMD_SET_FEATURES, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_set_features, }, { .cmd = TCMU_CMD_ADDED_DEVICE_DONE, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_add_dev_done, }, { .cmd = TCMU_CMD_REMOVED_DEVICE_DONE, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_rm_dev_done, }, { .cmd = TCMU_CMD_RECONFIG_DEVICE_DONE, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_reconfig_dev_done, }, }; @@ -472,6 +468,7 @@ static struct genl_family tcmu_genl_family __ro_after_init = { .name = "TCM-USER", .version = 2, .maxattr = TCMU_ATTR_MAX, + .policy = tcmu_attr_policy, .mcgrps = tcmu_mcgrps, .n_mcgrps = ARRAY_SIZE(tcmu_mcgrps), .netnsok = true, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 83f81ac53282..6cb82301d8e9 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -233,7 +233,6 @@ const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) { \ handler \ .cmd = op_name, \ - .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ }, #define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) @@ -290,7 +289,8 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { #ifdef GENL_MAGIC_FAMILY_HDRSZ .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), #endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .maxattr = ARRAY_SIZE(CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, diff --git a/include/net/genetlink.h b/include/net/genetlink.h index aa2e5888f18d..6850c7b1a3a6 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -26,6 +26,7 @@ struct genl_info; * @name: name of family * @version: protocol version * @maxattr: maximum number of attributes supported + * @policy: netlink policy * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them * @parallel_ops: operations can be called in parallel and aren't @@ -56,6 +57,7 @@ struct genl_family { unsigned int maxattr; bool netnsok; bool parallel_ops; + const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -124,14 +126,12 @@ static inline int genl_err_attr(struct genl_info *info, int err, * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags - * @policy: attribute validation policy * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps */ struct genl_ops { - const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*start)(struct netlink_callback *cb); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4e62a4a8fa91..1b942a7caf26 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -650,16 +650,37 @@ static const struct genl_ops taskstats_ops[] = { { .cmd = TASKSTATS_CMD_GET, .doit = taskstats_user_cmd, - .policy = taskstats_cmd_get_policy, - .flags = GENL_ADMIN_PERM, + /* policy enforced later */ + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_HASPOL, }, { .cmd = CGROUPSTATS_CMD_GET, .doit = cgroupstats_user_cmd, - .policy = cgroupstats_cmd_get_policy, + /* policy enforced later */ + .flags = GENL_CMD_CAP_HASPOL, }, }; +static int taskstats_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + const struct nla_policy *policy = NULL; + + switch (ops->cmd) { + case TASKSTATS_CMD_GET: + policy = taskstats_cmd_get_policy; + break; + case CGROUPSTATS_CMD_GET: + policy = cgroupstats_cmd_get_policy; + break; + default: + return -EINVAL; + } + + return nlmsg_validate(info->nlhdr, GENL_HDRLEN, TASKSTATS_CMD_ATTR_MAX, + policy, info->extack); +} + static struct genl_family family __ro_after_init = { .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, @@ -667,6 +688,7 @@ static struct genl_family family __ro_after_init = { .module = THIS_MODULE, .ops = taskstats_ops, .n_ops = ARRAY_SIZE(taskstats_ops), + .pre_doit = taskstats_pre_doit, }; /* Needed early in initialization */ diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 67a58da2e6a0..d3033a3d2a63 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1345,34 +1345,29 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .doit = batadv_netlink_get_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_algo_dump, }, { .cmd = BATADV_CMD_GET_HARDIF, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .dumpit = batadv_netlink_dump_hardif, .doit = batadv_netlink_get_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | @@ -1381,68 +1376,57 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, @@ -1450,7 +1434,6 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_VLAN, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .doit = batadv_netlink_get_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, @@ -1458,7 +1441,6 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_SET_VLAN, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, @@ -1470,6 +1452,7 @@ struct genl_family batadv_netlink_family __ro_after_init = { .name = BATADV_NL_NAME, .version = 1, .maxattr = BATADV_ATTR_MAX, + .policy = batadv_netlink_policy, .netnsok = true, .pre_doit = batadv_pre_doit, .post_doit = batadv_post_doit, diff --git a/net/core/devlink.c b/net/core/devlink.c index 78e22cea4cc7..1a65cbf1ab05 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3640,7 +3640,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { u64 ret_offset, start_offset, end_offset = 0; - const struct genl_ops *ops = cb->data; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; @@ -3657,7 +3656,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, return -ENOMEM; err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); + attrs, DEVLINK_ATTR_MAX, devlink_nl_family.policy, + cb->extack); if (err) goto out_free; @@ -4923,7 +4923,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_GET, .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, @@ -4931,21 +4930,18 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PORT_GET, .doit = devlink_nl_cmd_port_get_doit, .dumpit = devlink_nl_cmd_port_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_SET, .doit = devlink_nl_cmd_port_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .doit = devlink_nl_cmd_port_split_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -4953,7 +4949,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .doit = devlink_nl_cmd_port_unsplit_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -4962,7 +4957,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_GET, .doit = devlink_nl_cmd_sb_get_doit, .dumpit = devlink_nl_cmd_sb_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -4971,7 +4965,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_POOL_GET, .doit = devlink_nl_cmd_sb_pool_get_doit, .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -4979,7 +4972,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_POOL_SET, .doit = devlink_nl_cmd_sb_pool_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, @@ -4988,7 +4980,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .doit = devlink_nl_cmd_sb_port_pool_get_doit, .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -4996,7 +4987,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, .doit = devlink_nl_cmd_sb_port_pool_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, @@ -5005,7 +4995,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -5013,7 +5002,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, @@ -5021,7 +5009,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, .doit = devlink_nl_cmd_sb_occ_snapshot_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, @@ -5029,7 +5016,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, .doit = devlink_nl_cmd_sb_occ_max_clear_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, @@ -5037,14 +5023,12 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_ESWITCH_GET, .doit = devlink_nl_cmd_eswitch_get_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, .doit = devlink_nl_cmd_eswitch_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5052,49 +5036,42 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .doit = devlink_nl_cmd_dpipe_table_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .doit = devlink_nl_cmd_dpipe_entries_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .doit = devlink_nl_cmd_dpipe_headers_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, .doit = devlink_nl_cmd_dpipe_table_counters_set, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_SET, .doit = devlink_nl_cmd_resource_set, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .doit = devlink_nl_cmd_resource_dump, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, .doit = devlink_nl_cmd_reload, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5103,14 +5080,12 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PARAM_GET, .doit = devlink_nl_cmd_param_get_doit, .dumpit = devlink_nl_cmd_param_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PARAM_SET, .doit = devlink_nl_cmd_param_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, @@ -5118,14 +5093,12 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PORT_PARAM_GET, .doit = devlink_nl_cmd_port_param_get_doit, .dumpit = devlink_nl_cmd_port_param_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_PARAM_SET, .doit = devlink_nl_cmd_port_param_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, @@ -5133,21 +5106,18 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_REGION_GET, .doit = devlink_nl_cmd_region_get_doit, .dumpit = devlink_nl_cmd_region_get_dumpit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_DEL, .doit = devlink_nl_cmd_region_del, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_READ, .dumpit = devlink_nl_cmd_region_read_dumpit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, @@ -5155,7 +5125,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_INFO_GET, .doit = devlink_nl_cmd_info_get_doit, .dumpit = devlink_nl_cmd_info_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, @@ -5163,35 +5132,30 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, .doit = devlink_nl_cmd_health_reporter_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, .doit = devlink_nl_cmd_health_reporter_recover_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, .doit = devlink_nl_cmd_health_reporter_dump_get_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5199,7 +5163,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5207,7 +5170,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_FLASH_UPDATE, .doit = devlink_nl_cmd_flash_update, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, @@ -5217,6 +5179,7 @@ static struct genl_family devlink_nl_family __ro_after_init = { .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, .maxattr = DEVLINK_ATTR_MAX, + .policy = devlink_nl_policy, .netnsok = true, .pre_doit = devlink_nl_pre_doit, .post_doit = devlink_nl_post_doit, diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b9cce0fd5696..bcc04d3e724f 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -449,14 +449,12 @@ static const struct genl_ops hsr_ops[] = { { .cmd = HSR_C_GET_NODE_STATUS, .flags = 0, - .policy = hsr_genl_policy, .doit = hsr_get_node_status, .dumpit = NULL, }, { .cmd = HSR_C_GET_NODE_LIST, .flags = 0, - .policy = hsr_genl_policy, .doit = hsr_get_node_list, .dumpit = NULL, }, @@ -467,6 +465,7 @@ static struct genl_family hsr_genl_family __ro_after_init = { .name = "HSR", .version = 1, .maxattr = HSR_A_MAX, + .policy = hsr_genl_policy, .module = THIS_MODULE, .ops = hsr_ops, .n_ops = ARRAY_SIZE(hsr_ops), diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index a5d7515b7f62..bc147bc8e36a 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -20,7 +20,6 @@ void ieee802154_nl_exit(void); #define IEEE802154_OP(_cmd, _func) \ { \ .cmd = _cmd, \ - .policy = ieee802154_policy, \ .doit = _func, \ .dumpit = NULL, \ .flags = GENL_ADMIN_PERM, \ @@ -29,7 +28,6 @@ void ieee802154_nl_exit(void); #define IEEE802154_DUMP(_cmd, _func, _dump) \ { \ .cmd = _cmd, \ - .policy = ieee802154_policy, \ .doit = _func, \ .dumpit = _dump, \ } diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 96636e3b7aa9..098d67439b6d 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -136,6 +136,7 @@ struct genl_family nl802154_family __ro_after_init = { .name = IEEE802154_NL_NAME, .version = 1, .maxattr = IEEE802154_ATTR_MAX, + .policy = ieee802154_policy, .module = THIS_MODULE, .ops = ieee802154_ops, .n_ops = ARRAY_SIZE(ieee802154_ops), diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 99f6c254ea77..308370cfd668 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2220,7 +2220,6 @@ static const struct genl_ops nl802154_ops[] = { .doit = nl802154_get_wpan_phy, .dumpit = nl802154_dump_wpan_phy, .done = nl802154_dump_wpan_phy_done, - .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2229,7 +2228,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_INTERFACE, .doit = nl802154_get_interface, .dumpit = nl802154_dump_interface, - .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, @@ -2237,7 +2235,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_INTERFACE, .doit = nl802154_new_interface, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2245,7 +2242,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_INTERFACE, .doit = nl802154_del_interface, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, @@ -2253,7 +2249,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_CHANNEL, .doit = nl802154_set_channel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2261,7 +2256,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_CCA_MODE, .doit = nl802154_set_cca_mode, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2269,7 +2263,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_CCA_ED_LEVEL, .doit = nl802154_set_cca_ed_level, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2277,7 +2270,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_TX_POWER, .doit = nl802154_set_tx_power, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2285,7 +2277,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_WPAN_PHY_NETNS, .doit = nl802154_wpan_phy_netns, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2293,7 +2284,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_PAN_ID, .doit = nl802154_set_pan_id, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2301,7 +2291,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_SHORT_ADDR, .doit = nl802154_set_short_addr, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2309,7 +2298,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_BACKOFF_EXPONENT, .doit = nl802154_set_backoff_exponent, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2317,7 +2305,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_MAX_CSMA_BACKOFFS, .doit = nl802154_set_max_csma_backoffs, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2325,7 +2312,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_MAX_FRAME_RETRIES, .doit = nl802154_set_max_frame_retries, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2333,7 +2319,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_LBT_MODE, .doit = nl802154_set_lbt_mode, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2341,7 +2326,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_ACKREQ_DEFAULT, .doit = nl802154_set_ackreq_default, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2350,7 +2334,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_SEC_PARAMS, .doit = nl802154_set_llsec_params, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2359,7 +2342,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_KEY, /* TODO .doit by matching key id? */ .dumpit = nl802154_dump_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2367,7 +2349,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_KEY, .doit = nl802154_add_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2375,7 +2356,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_SEC_KEY, .doit = nl802154_del_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2385,7 +2365,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_DEV, /* TODO .doit by matching extended_addr? */ .dumpit = nl802154_dump_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2393,7 +2372,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_DEV, .doit = nl802154_add_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2401,7 +2379,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_SEC_DEV, .doit = nl802154_del_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2411,7 +2388,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_DEVKEY, /* TODO doit by matching ??? */ .dumpit = nl802154_dump_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2419,7 +2395,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_DEVKEY, .doit = nl802154_add_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2427,7 +2402,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_SEC_DEVKEY, .doit = nl802154_del_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2436,7 +2410,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_LEVEL, /* TODO .doit by matching frame_type? */ .dumpit = nl802154_dump_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2444,7 +2417,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_LEVEL, .doit = nl802154_add_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2453,7 +2425,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_DEL_SEC_LEVEL, /* TODO match frame_type only? */ .doit = nl802154_del_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2466,6 +2437,7 @@ static struct genl_family nl802154_fam __ro_after_init = { .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL802154_ATTR_MAX, + .policy = nl802154_policy, .netnsok = true, .pre_doit = nl802154_pre_doit, .post_doit = nl802154_post_doit, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 79e98e21cdd7..a23fbb52d265 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -808,20 +808,17 @@ static const struct genl_ops fou_nl_ops[] = { { .cmd = FOU_CMD_ADD, .doit = fou_nl_cmd_add_port, - .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_DEL, .doit = fou_nl_cmd_rm_port, - .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_GET, .doit = fou_nl_cmd_get_port, .dumpit = fou_nl_dump, - .policy = fou_nl_policy, }, }; @@ -830,6 +827,7 @@ static struct genl_family fou_nl_family __ro_after_init = { .name = FOU_GENL_NAME, .version = FOU_GENL_VERSION, .maxattr = FOU_ATTR_MAX, + .policy = fou_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = fou_nl_ops, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b467a7cabf40..4ccec4c705f7 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -953,12 +953,10 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { .cmd = TCP_METRICS_CMD_GET, .doit = tcp_metrics_nl_cmd_get, .dumpit = tcp_metrics_nl_dump, - .policy = tcp_metrics_nl_policy, }, { .cmd = TCP_METRICS_CMD_DEL, .doit = tcp_metrics_nl_cmd_del, - .policy = tcp_metrics_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -968,6 +966,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = { .name = TCP_METRICS_GENL_NAME, .version = TCP_METRICS_GENL_VERSION, .maxattr = TCP_METRICS_ATTR_MAX, + .policy = tcp_metrics_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tcp_metrics_nl_ops, diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 18fac76b9520..8d31a5066d0c 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -17,19 +17,16 @@ static const struct genl_ops ila_nl_ops[] = { { .cmd = ILA_CMD_ADD, .doit = ila_xlat_nl_cmd_add_mapping, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_DEL, .doit = ila_xlat_nl_cmd_del_mapping, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_FLUSH, .doit = ila_xlat_nl_cmd_flush, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { @@ -38,7 +35,6 @@ static const struct genl_ops ila_nl_ops[] = { .start = ila_xlat_nl_dump_start, .dumpit = ila_xlat_nl_dump, .done = ila_xlat_nl_dump_done, - .policy = ila_nl_policy, }, }; @@ -49,6 +45,7 @@ struct genl_family ila_nl_family __ro_after_init = { .name = ILA_GENL_NAME, .version = ILA_GENL_VERSION, .maxattr = ILA_ATTR_MAX, + .policy = ila_nl_policy, .netnsok = true, .parallel_ops = true, .module = THIS_MODULE, diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 9b2f272ca164..ceff773471e7 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -399,7 +399,6 @@ static const struct genl_ops seg6_genl_ops[] = { { .cmd = SEG6_CMD_SETHMAC, .doit = seg6_genl_sethmac, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { @@ -407,19 +406,16 @@ static const struct genl_ops seg6_genl_ops[] = { .start = seg6_genl_dumphmac_start, .dumpit = seg6_genl_dumphmac, .done = seg6_genl_dumphmac_done, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_SET_TUNSRC, .doit = seg6_genl_set_tunsrc, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_GET_TUNSRC, .doit = seg6_genl_get_tunsrc, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -429,6 +425,7 @@ static struct genl_family seg6_genl_family __ro_after_init = { .name = SEG6_GENL_NAME, .version = SEG6_GENL_VERSION, .maxattr = SEG6_ATTR_MAX, + .policy = seg6_genl_policy, .netnsok = true, .parallel_ops = true, .ops = seg6_genl_ops, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index edbd5d1fbcde..77595fcc9f75 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -916,57 +916,48 @@ static const struct genl_ops l2tp_nl_ops[] = { { .cmd = L2TP_CMD_NOOP, .doit = l2tp_nl_cmd_noop, - .policy = l2tp_nl_policy, /* can be retrieved by unprivileged users */ }, { .cmd = L2TP_CMD_TUNNEL_CREATE, .doit = l2tp_nl_cmd_tunnel_create, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_DELETE, .doit = l2tp_nl_cmd_tunnel_delete, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_MODIFY, .doit = l2tp_nl_cmd_tunnel_modify, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_GET, .doit = l2tp_nl_cmd_tunnel_get, .dumpit = l2tp_nl_cmd_tunnel_dump, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_CREATE, .doit = l2tp_nl_cmd_session_create, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_DELETE, .doit = l2tp_nl_cmd_session_delete, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_MODIFY, .doit = l2tp_nl_cmd_session_modify, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_GET, .doit = l2tp_nl_cmd_session_get, .dumpit = l2tp_nl_cmd_session_dump, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -976,6 +967,7 @@ static struct genl_family l2tp_nl_family __ro_after_init = { .version = L2TP_GENL_VERSION, .hdrsize = 0, .maxattr = L2TP_ATTR_MAX, + .policy = l2tp_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = l2tp_nl_ops, diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bad17bba8ba7..367b2f6513e0 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -723,38 +723,32 @@ static int ncsi_set_channel_mask_nl(struct sk_buff *msg, static const struct genl_ops ncsi_ops[] = { { .cmd = NCSI_CMD_PKG_INFO, - .policy = ncsi_genl_policy, .doit = ncsi_pkg_info_nl, .dumpit = ncsi_pkg_info_all_nl, .flags = 0, }, { .cmd = NCSI_CMD_SET_INTERFACE, - .policy = ncsi_genl_policy, .doit = ncsi_set_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_CLEAR_INTERFACE, - .policy = ncsi_genl_policy, .doit = ncsi_clear_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SEND_CMD, - .policy = ncsi_genl_policy, .doit = ncsi_send_cmd_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_PACKAGE_MASK, - .policy = ncsi_genl_policy, .doit = ncsi_set_package_mask_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_CHANNEL_MASK, - .policy = ncsi_genl_policy, .doit = ncsi_set_channel_mask_nl, .flags = GENL_ADMIN_PERM, }, @@ -764,6 +758,7 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .name = "NCSI", .version = 0, .maxattr = NCSI_ATTR_MAX, + .policy = ncsi_genl_policy, .module = THIS_MODULE, .ops = ncsi_ops, .n_ops = ARRAY_SIZE(ncsi_ops), diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 053cd96b9c76..4b933669fd83 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3775,19 +3775,16 @@ static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_NEW_SERVICE, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_SERVICE, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_SERVICE, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { @@ -3795,42 +3792,35 @@ static const struct genl_ops ip_vs_genl_ops[] = { .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, .dumpit = ip_vs_genl_dump_services, - .policy = ip_vs_cmd_policy, }, { .cmd = IPVS_CMD_NEW_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .dumpit = ip_vs_genl_dump_dests, }, { .cmd = IPVS_CMD_NEW_DAEMON, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_DEL_DAEMON, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_daemon, }, { @@ -3841,7 +3831,6 @@ static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_SET_CONFIG, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { @@ -3857,7 +3846,6 @@ static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_ZERO, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { @@ -3872,6 +3860,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, .maxattr = IPVS_CMD_ATTR_MAX, + .policy = ip_vs_cmd_policy, .netnsok = true, /* Make ipvsadm to work on netns */ .module = THIS_MODULE, .ops = ip_vs_genl_ops, diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 4d748975117d..80184513b2b2 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -322,28 +322,24 @@ static const struct genl_ops netlbl_calipso_ops[] = { { .cmd = NLBL_CALIPSO_C_ADD, .flags = GENL_ADMIN_PERM, - .policy = calipso_genl_policy, .doit = netlbl_calipso_add, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_REMOVE, .flags = GENL_ADMIN_PERM, - .policy = calipso_genl_policy, .doit = netlbl_calipso_remove, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LIST, .flags = 0, - .policy = calipso_genl_policy, .doit = netlbl_calipso_list, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LISTALL, .flags = 0, - .policy = calipso_genl_policy, .doit = NULL, .dumpit = netlbl_calipso_listall, }, @@ -354,6 +350,7 @@ static struct genl_family netlbl_calipso_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_CALIPSO_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_CALIPSO_A_MAX, + .policy = calipso_genl_policy, .module = THIS_MODULE, .ops = netlbl_calipso_ops, .n_ops = ARRAY_SIZE(netlbl_calipso_ops), diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 9aacf2da3d98..ba7800f94ccc 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -734,28 +734,24 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { { .cmd = NLBL_CIPSOV4_C_ADD, .flags = GENL_ADMIN_PERM, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_add, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_REMOVE, .flags = GENL_ADMIN_PERM, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_remove, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LIST, .flags = 0, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_list, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LISTALL, .flags = 0, - .policy = netlbl_cipsov4_genl_policy, .doit = NULL, .dumpit = netlbl_cipsov4_listall, }, @@ -766,6 +762,7 @@ static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_CIPSOV4_A_MAX, + .policy = netlbl_cipsov4_genl_policy, .module = THIS_MODULE, .ops = netlbl_cipsov4_ops, .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops), diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 21e0095b1d14..a16eacfb2236 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -773,56 +773,48 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { { .cmd = NLBL_MGMT_C_ADD, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_add, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVE, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_remove, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTALL, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = NULL, .dumpit = netlbl_mgmt_listall, }, { .cmd = NLBL_MGMT_C_ADDDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_adddef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVEDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_removedef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTDEF, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_listdef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_PROTOCOLS, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = NULL, .dumpit = netlbl_mgmt_protocols, }, { .cmd = NLBL_MGMT_C_VERSION, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_version, .dumpit = NULL, }, @@ -833,6 +825,7 @@ static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_MGMT_A_MAX, + .policy = netlbl_mgmt_genl_policy, .module = THIS_MODULE, .ops = netlbl_mgmt_genl_ops, .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c92894c3e40a..6b1b6c2b5141 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1318,56 +1318,48 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { { .cmd = NLBL_UNLABEL_C_STATICADD, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticadd, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVE, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticremove, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLIST, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = NULL, .dumpit = netlbl_unlabel_staticlist, }, { .cmd = NLBL_UNLABEL_C_STATICADDDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticadddef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticremovedef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLISTDEF, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = NULL, .dumpit = netlbl_unlabel_staticlistdef, }, { .cmd = NLBL_UNLABEL_C_ACCEPT, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_accept, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_LIST, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_list, .dumpit = NULL, }, @@ -1378,6 +1370,7 @@ static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_UNLABEL_A_MAX, + .policy = netlbl_unlabel_genl_policy, .module = THIS_MODULE, .ops = netlbl_unlabel_genl_ops, .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 25eeb6d2a75a..a75ea33fb5ea 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -577,7 +577,7 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (attrbuf) { err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - ops->policy, extack); + family->policy, extack); if (err < 0) goto out; } @@ -677,7 +677,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, op_flags |= GENL_CMD_CAP_DUMP; if (ops->doit) op_flags |= GENL_CMD_CAP_DO; - if (ops->policy) + if (family->policy) op_flags |= GENL_CMD_CAP_HASPOL; nest = nla_nest_start(skb, i + 1); @@ -939,7 +939,6 @@ static const struct genl_ops genl_ctrl_ops[] = { .cmd = CTRL_CMD_GETFAMILY, .doit = ctrl_getfamily, .dumpit = ctrl_dumpfamily, - .policy = ctrl_policy, }, }; @@ -957,6 +956,7 @@ static struct genl_family genl_ctrl __ro_after_init = { .name = "nlctrl", .version = 0x2, .maxattr = CTRL_ATTR_MAX, + .policy = ctrl_policy, .netnsok = true, }; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 376181cc1def..4d9f3ac8d562 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1670,99 +1670,80 @@ static const struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_get_device, .dumpit = nfc_genl_dump_devices, .done = nfc_genl_dump_devices_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEV_UP, .doit = nfc_genl_dev_up, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEV_DOWN, .doit = nfc_genl_dev_down, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_START_POLL, .doit = nfc_genl_start_poll, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_STOP_POLL, .doit = nfc_genl_stop_poll, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEP_LINK_UP, .doit = nfc_genl_dep_link_up, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, .doit = nfc_genl_dep_link_down, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_GET_TARGET, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_GET_PARAMS, .doit = nfc_genl_llc_get_params, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_SET_PARAMS, .doit = nfc_genl_llc_set_params, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_SDREQ, .doit = nfc_genl_llc_sdreq, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_FW_DOWNLOAD, .doit = nfc_genl_fw_download, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_ENABLE_SE, .doit = nfc_genl_enable_se, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DISABLE_SE, .doit = nfc_genl_disable_se, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_GET_SE, .dumpit = nfc_genl_dump_ses, .done = nfc_genl_dump_ses_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_SE_IO, .doit = nfc_genl_se_io, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, .doit = nfc_genl_activate_target, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_VENDOR, .doit = nfc_genl_vendor_cmd, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, .doit = nfc_genl_deactivate_target, - .policy = nfc_genl_policy, }, }; @@ -1771,6 +1752,7 @@ static struct genl_family nfc_genl_family __ro_after_init = { .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, .maxattr = NFC_ATTR_MAX, + .policy = nfc_genl_policy, .module = THIS_MODULE, .ops = nfc_genl_ops, .n_ops = ARRAY_SIZE(nfc_genl_ops), diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 1b6896896fff..51080004677e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -2154,18 +2154,15 @@ static struct genl_ops ct_limit_genl_ops[] = { { .cmd = OVS_CT_LIMIT_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_set, }, { .cmd = OVS_CT_LIMIT_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_del, }, { .cmd = OVS_CT_LIMIT_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_get, }, }; @@ -2179,6 +2176,7 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { .name = OVS_CT_LIMIT_FAMILY, .version = OVS_CT_LIMIT_VERSION, .maxattr = OVS_CT_LIMIT_ATTR_MAX, + .policy = ct_limit_policy, .netnsok = true, .parallel_ops = true, .ops = ct_limit_genl_ops, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9dd158ab51b3..a64d3eb1f9a9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -639,7 +639,6 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { static const struct genl_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = packet_policy, .doit = ovs_packet_cmd_execute } }; @@ -649,6 +648,7 @@ static struct genl_family dp_packet_genl_family __ro_after_init = { .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, + .policy = packet_policy, .netnsok = true, .parallel_ops = true, .ops = dp_packet_genl_ops, @@ -1424,23 +1424,19 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = flow_policy, .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_set, }, }; @@ -1450,6 +1446,7 @@ static struct genl_family dp_flow_genl_family __ro_after_init = { .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, + .policy = flow_policy, .netnsok = true, .parallel_ops = true, .ops = dp_flow_genl_ops, @@ -1817,23 +1814,19 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { static const struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_set, }, }; @@ -1843,6 +1836,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, + .policy = datapath_policy, .netnsok = true, .parallel_ops = true, .ops = dp_datapath_genl_ops, @@ -2260,23 +2254,19 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { static const struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = vport_policy, .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_set, }, }; @@ -2286,6 +2276,7 @@ struct genl_family dp_vport_genl_family __ro_after_init = { .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, + .policy = vport_policy, .netnsok = true, .parallel_ops = true, .ops = dp_vport_genl_ops, diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 43849d752a1e..0be3d097ae01 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -527,26 +527,22 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, static struct genl_ops dp_meter_genl_ops[] = { { .cmd = OVS_METER_CMD_FEATURES, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_features }, { .cmd = OVS_METER_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = meter_policy, .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_get, }, { .cmd = OVS_METER_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = meter_policy, .doit = ovs_meter_cmd_del }, }; @@ -560,6 +556,7 @@ struct genl_family dp_meter_genl_family __ro_after_init = { .name = OVS_METER_FAMILY, .version = OVS_METER_VERSION, .maxattr = OVS_METER_ATTR_MAX, + .policy = meter_policy, .netnsok = true, .parallel_ops = true, .ops = dp_meter_genl_ops, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 8d2f6296279c..3cdf81cf97a3 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -611,7 +611,6 @@ static const struct genl_ops smc_pnet_ops[] = { { .cmd = SMC_PNETID_GET, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_get, .dumpit = smc_pnet_dump, .start = smc_pnet_dump_start @@ -619,19 +618,16 @@ static const struct genl_ops smc_pnet_ops[] = { { .cmd = SMC_PNETID_ADD, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_add }, { .cmd = SMC_PNETID_DEL, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_del }, { .cmd = SMC_PNETID_FLUSH, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_flush } }; @@ -642,6 +638,7 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = { .name = SMCR_GENL_FAMILY_NAME, .version = SMCR_GENL_FAMILY_VERSION, .maxattr = SMC_PNETID_MAX, + .policy = smc_pnet_policy, .netnsok = true, .module = THIS_MODULE, .ops = smc_pnet_ops, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 5240f64e8ccc..2d178df0a89f 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -144,114 +144,93 @@ static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, .doit = tipc_nl_bearer_disable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ENABLE, .doit = tipc_nl_bearer_enable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_GET, .doit = tipc_nl_bearer_get, .dumpit = tipc_nl_bearer_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ADD, .doit = tipc_nl_bearer_add, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_SET, .doit = tipc_nl_bearer_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_SOCK_GET, .start = tipc_dump_start, .dumpit = tipc_nl_sk_dump, .done = tipc_dump_done, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PUBL_GET, .dumpit = tipc_nl_publ_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_GET, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, .doit = tipc_nl_node_set_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, .doit = tipc_nl_node_reset_link_stats, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_GET, .doit = tipc_nl_media_get, .dumpit = tipc_nl_media_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_SET, .doit = tipc_nl_media_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NODE_GET, .dumpit = tipc_nl_node_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_GET, .dumpit = tipc_nl_net_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_SET, .doit = tipc_nl_net_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NAME_TABLE_GET, .dumpit = tipc_nl_name_table_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_SET, .doit = tipc_nl_node_set_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_GET, .doit = tipc_nl_node_get_monitor, .dumpit = tipc_nl_node_dump_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_PEER_GET, .dumpit = tipc_nl_node_dump_monitor_peer, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PEER_REMOVE, .doit = tipc_nl_peer_rm, - .policy = tipc_nl_policy, }, #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, .dumpit = tipc_udp_nl_dump_remoteip, - .policy = tipc_nl_policy, }, #endif }; @@ -261,6 +240,7 @@ struct genl_family tipc_genl_family __ro_after_init = { .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .policy = tipc_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tipc_genl_v2_ops, diff --git a/net/wimax/stack.c b/net/wimax/stack.c index a6307813b6d5..b7f571e55448 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -420,25 +420,21 @@ static const struct genl_ops wimax_gnl_ops[] = { { .cmd = WIMAX_GNL_OP_MSG_FROM_USER, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_msg_from_user, }, { .cmd = WIMAX_GNL_OP_RESET, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_reset, }, { .cmd = WIMAX_GNL_OP_RFKILL, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_rfkill, }, { .cmd = WIMAX_GNL_OP_STATE_GET, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_state_get, }, }; @@ -582,6 +578,7 @@ struct genl_family wimax_gnl_family __ro_after_init = { .version = WIMAX_GNL_VERSION, .hdrsize = 0, .maxattr = WIMAX_GNL_ATTR_MAX, + .policy = wimax_gnl_policy, .module = THIS_MODULE, .ops = wimax_gnl_ops, .n_ops = ARRAY_SIZE(wimax_gnl_ops), diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 25a9e3b5c154..33408ba1d7ee 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13368,7 +13368,6 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13376,7 +13375,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WIPHY, .doit = nl80211_set_wiphy, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, @@ -13384,7 +13382,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_INTERFACE, .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13392,7 +13389,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_INTERFACE, .doit = nl80211_set_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13400,7 +13396,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_INTERFACE, .doit = nl80211_new_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13408,7 +13403,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_INTERFACE, .doit = nl80211_del_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13416,7 +13410,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_KEY, .doit = nl80211_get_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13424,7 +13417,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_KEY, .doit = nl80211_set_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13433,7 +13425,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_KEY, .doit = nl80211_new_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13442,14 +13433,12 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_KEY, .doit = nl80211_del_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_BEACON, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13457,7 +13446,6 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_START_AP, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13465,7 +13453,6 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_STOP_AP, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13475,14 +13462,12 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_STATION, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_STATION, .doit = nl80211_set_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13490,7 +13475,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_STATION, .doit = nl80211_new_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13498,7 +13482,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_STATION, .doit = nl80211_del_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13507,7 +13490,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_MPATH, .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13516,7 +13498,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_MPP, .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13524,7 +13505,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MPATH, .doit = nl80211_set_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13532,7 +13512,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_MPATH, .doit = nl80211_new_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13540,7 +13519,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_MPATH, .doit = nl80211_del_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13548,7 +13526,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_BSS, .doit = nl80211_set_bss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13557,7 +13534,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_REG, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, @@ -13565,7 +13541,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_REG, .doit = nl80211_set_reg, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, @@ -13573,19 +13548,16 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REQ_SET_REG, .doit = nl80211_req_set_reg, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_RELOAD_REGDB, .doit = nl80211_reload_regdb, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_MESH_CONFIG, .doit = nl80211_get_mesh_config, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13593,7 +13565,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MESH_CONFIG, .doit = nl80211_update_mesh_config, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13601,7 +13572,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TRIGGER_SCAN, .doit = nl80211_trigger_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13609,20 +13579,17 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ABORT_SCAN, .doit = nl80211_abort_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_SCAN, - .policy = nl80211_policy, .dumpit = nl80211_dump_scan, }, { .cmd = NL80211_CMD_START_SCHED_SCAN, .doit = nl80211_start_sched_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13630,7 +13597,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_STOP_SCHED_SCAN, .doit = nl80211_stop_sched_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13638,7 +13604,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_AUTHENTICATE, .doit = nl80211_authenticate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13647,7 +13612,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ASSOCIATE, .doit = nl80211_associate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13655,7 +13619,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEAUTHENTICATE, .doit = nl80211_deauthenticate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13663,7 +13626,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DISASSOCIATE, .doit = nl80211_disassociate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13671,7 +13633,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_JOIN_IBSS, .doit = nl80211_join_ibss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13679,7 +13640,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_LEAVE_IBSS, .doit = nl80211_leave_ibss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13689,7 +13649,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_TESTMODE, .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13698,7 +13657,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CONNECT, .doit = nl80211_connect, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13706,7 +13664,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, .doit = nl80211_update_connect_params, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13714,7 +13671,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DISCONNECT, .doit = nl80211_disconnect, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13722,20 +13678,17 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WIPHY_NETNS, .doit = nl80211_wiphy_netns, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_SURVEY, - .policy = nl80211_policy, .dumpit = nl80211_dump_survey, }, { .cmd = NL80211_CMD_SET_PMKSA, .doit = nl80211_setdel_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13743,7 +13696,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_PMKSA, .doit = nl80211_setdel_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13751,7 +13703,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_FLUSH_PMKSA, .doit = nl80211_flush_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13759,7 +13710,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, .doit = nl80211_remain_on_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13767,7 +13717,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, .doit = nl80211_cancel_remain_on_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13775,7 +13724,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, .doit = nl80211_set_tx_bitrate_mask, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13783,7 +13731,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REGISTER_FRAME, .doit = nl80211_register_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13791,7 +13738,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_FRAME, .doit = nl80211_tx_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13799,7 +13745,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, .doit = nl80211_tx_mgmt_cancel_wait, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13807,7 +13752,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_POWER_SAVE, .doit = nl80211_set_power_save, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13815,7 +13759,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_POWER_SAVE, .doit = nl80211_get_power_save, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13823,7 +13766,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_CQM, .doit = nl80211_set_cqm, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13831,7 +13773,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_CHANNEL, .doit = nl80211_set_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13839,7 +13780,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WDS_PEER, .doit = nl80211_set_wds_peer, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13847,7 +13787,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_JOIN_MESH, .doit = nl80211_join_mesh, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13855,7 +13794,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_LEAVE_MESH, .doit = nl80211_leave_mesh, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13863,7 +13801,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_JOIN_OCB, .doit = nl80211_join_ocb, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13871,7 +13808,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_LEAVE_OCB, .doit = nl80211_leave_ocb, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13880,7 +13816,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WOWLAN, .doit = nl80211_get_wowlan, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13888,7 +13823,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WOWLAN, .doit = nl80211_set_wowlan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13897,7 +13831,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, .doit = nl80211_set_rekey_data, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13906,7 +13839,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_MGMT, .doit = nl80211_tdls_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13914,7 +13846,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_OPER, .doit = nl80211_tdls_oper, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13922,7 +13853,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_UNEXPECTED_FRAME, .doit = nl80211_register_unexpected_frame, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13930,7 +13860,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_PROBE_CLIENT, .doit = nl80211_probe_client, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13938,7 +13867,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REGISTER_BEACONS, .doit = nl80211_register_beacons, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13946,7 +13874,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_NOACK_MAP, .doit = nl80211_set_noack_map, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13954,7 +13881,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_START_P2P_DEVICE, .doit = nl80211_start_p2p_device, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13962,7 +13888,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_STOP_P2P_DEVICE, .doit = nl80211_stop_p2p_device, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13970,7 +13895,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_START_NAN, .doit = nl80211_start_nan, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13978,7 +13902,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_STOP_NAN, .doit = nl80211_stop_nan, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13986,7 +13909,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ADD_NAN_FUNCTION, .doit = nl80211_nan_add_func, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13994,7 +13916,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, .doit = nl80211_nan_del_func, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14002,7 +13923,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, .doit = nl80211_nan_change_config, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14010,7 +13930,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -14018,7 +13937,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MAC_ACL, .doit = nl80211_set_mac_acl, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -14026,7 +13944,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_RADAR_DETECT, .doit = nl80211_start_radar_detection, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14034,12 +13951,10 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, .doit = nl80211_get_protocol_features, - .policy = nl80211_policy, }, { .cmd = NL80211_CMD_UPDATE_FT_IES, .doit = nl80211_update_ft_ies, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14047,7 +13962,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, .doit = nl80211_crit_protocol_start, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14055,7 +13969,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, .doit = nl80211_crit_protocol_stop, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14063,14 +13976,12 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_COALESCE, .doit = nl80211_get_coalesce, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_COALESCE, .doit = nl80211_set_coalesce, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -14078,7 +13989,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CHANNEL_SWITCH, .doit = nl80211_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14087,7 +13997,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_VENDOR, .doit = nl80211_vendor_cmd, .dumpit = nl80211_vendor_cmd_dump, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -14095,7 +14004,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_QOS_MAP, .doit = nl80211_set_qos_map, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14103,7 +14011,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ADD_TX_TS, .doit = nl80211_add_tx_ts, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14111,7 +14018,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_TX_TS, .doit = nl80211_del_tx_ts, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14119,7 +14025,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, .doit = nl80211_tdls_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14127,7 +14032,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, .doit = nl80211_tdls_cancel_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14135,7 +14039,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, .doit = nl80211_set_multicast_to_unicast, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -14143,21 +14046,18 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_PMK, .doit = nl80211_set_pmk, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DEL_PMK, .doit = nl80211_del_pmk, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, .doit = nl80211_external_auth, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14165,7 +14065,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, .doit = nl80211_tx_control_port, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14173,14 +14072,12 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .doit = nl80211_get_ftm_responder_stats, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, .doit = nl80211_pmsr_start, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14188,7 +14085,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NOTIFY_RADAR, .doit = nl80211_notify_radar_detection, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14200,6 +14096,7 @@ static struct genl_family nl80211_fam __ro_after_init = { .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, + .policy = nl80211_policy, .netnsok = true, .pre_doit = nl80211_pre_doit, .post_doit = nl80211_post_doit, -- cgit From 8db5da0b8618df79eceea99672e205d4a2a6309e Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Sun, 27 Jan 2019 19:03:45 -0500 Subject: x86/ima: require signed kernel modules Have the IMA architecture specific policy require signed kernel modules on systems with secure boot mode enabled; and coordinate the different signature verification methods, so only one signature is required. Requiring appended kernel module signatures may be configured, enabled on the boot command line, or with this patch enabled in secure boot mode. This patch defines set_module_sig_enforced(). To coordinate between appended kernel module signatures and IMA signatures, only define an IMA MODULE_CHECK policy rule if CONFIG_MODULE_SIG is not enabled. A custom IMA policy may still define and require an IMA signature. Signed-off-by: Mimi Zohar Reviewed-by: Luis Chamberlain Acked-by: Jessica Yu --- arch/x86/kernel/ima_arch.c | 9 ++++++++- include/linux/module.h | 5 +++++ kernel/module.c | 5 +++++ 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index e47cd9390ab4..3fb9847f1cad 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -64,12 +64,19 @@ static const char * const sb_arch_rules[] = { "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig", #endif /* CONFIG_KEXEC_VERIFY_SIG */ "measure func=KEXEC_KERNEL_CHECK", +#if !IS_ENABLED(CONFIG_MODULE_SIG) + "appraise func=MODULE_CHECK appraise_type=imasig", +#endif + "measure func=MODULE_CHECK", NULL }; const char * const *arch_get_ima_policy(void) { - if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) + if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { + if (IS_ENABLED(CONFIG_MODULE_SIG)) + set_module_sig_enforced(); return sb_arch_rules; + } return NULL; } diff --git a/include/linux/module.h b/include/linux/module.h index 5bf5dcd91009..73ee2b10e816 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -676,6 +676,7 @@ static inline bool is_livepatch_module(struct module *mod) #endif /* CONFIG_LIVEPATCH */ bool is_module_sig_enforced(void); +void set_module_sig_enforced(void); #else /* !CONFIG_MODULES... */ @@ -796,6 +797,10 @@ static inline bool is_module_sig_enforced(void) return false; } +static inline void set_module_sig_enforced(void) +{ +} + /* Dereference module function descriptor */ static inline void *dereference_module_function_descriptor(struct module *mod, void *ptr) diff --git a/kernel/module.c b/kernel/module.c index 0b9aa8ab89f0..985caa467aef 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -286,6 +286,11 @@ bool is_module_sig_enforced(void) } EXPORT_SYMBOL(is_module_sig_enforced); +void set_module_sig_enforced(void) +{ + sig_enforce = true; +} + /* Block module loading/unloading? */ int modules_disabled = 0; core_param(nomodule, modules_disabled, bint, 0); -- cgit From 1c7651f43777cdd59c1aaa82c87324d3e7438c7b Mon Sep 17 00:00:00 2001 From: Eugene Loh Date: Mon, 25 Feb 2019 11:59:58 -0800 Subject: kallsyms: store type information in its own array When a module is loaded, its symbols' Elf_Sym information is stored in a symtab. Further, type information is also captured. Since Elf_Sym has no type field, historically the st_info field has been hijacked for storing type: st_info was overwritten. commit 5439c985c5a83a8419f762115afdf560ab72a452 ("module: Overwrite st_size instead of st_info") changes that practice, as its one-liner indicates. Unfortunately, this change overwrites symbol size, information that a tool like DTrace expects to find. Allocate a typetab array to store type information so that no Elf_Sym field needs to be overwritten. Fixes: 5439c985c5a8 ("module: Overwrite st_size instead of st_info") Signed-off-by: Eugene Loh Reviewed-by: Nick Alcock [jeyu: renamed typeoff -> typeoffs ] Signed-off-by: Jessica Yu --- include/linux/module.h | 1 + kernel/module-internal.h | 2 +- kernel/module.c | 21 ++++++++++++++------- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/include/linux/module.h b/include/linux/module.h index 5bf5dcd91009..3abe8176df98 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -331,6 +331,7 @@ struct mod_kallsyms { Elf_Sym *symtab; unsigned int num_symtab; char *strtab; + char *typetab; }; #ifdef CONFIG_LIVEPATCH diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 79c9be2dbbe9..d354341f8cc0 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -20,7 +20,7 @@ struct load_info { unsigned long len; Elf_Shdr *sechdrs; char *secstrings, *strtab; - unsigned long symoffs, stroffs; + unsigned long symoffs, stroffs, init_typeoffs, core_typeoffs; struct _ddebug *debug; unsigned int num_debug; bool sig_ok; diff --git a/kernel/module.c b/kernel/module.c index 0b9aa8ab89f0..69e52e82242a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2647,6 +2647,8 @@ static void layout_symtab(struct module *mod, struct load_info *info) info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1); info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym); mod->core_layout.size += strtab_size; + info->core_typeoffs = mod->core_layout.size; + mod->core_layout.size += ndst * sizeof(char); mod->core_layout.size = debug_align(mod->core_layout.size); /* Put string table section at end of init part of module. */ @@ -2660,6 +2662,8 @@ static void layout_symtab(struct module *mod, struct load_info *info) __alignof__(struct mod_kallsyms)); info->mod_kallsyms_init_off = mod->init_layout.size; mod->init_layout.size += sizeof(struct mod_kallsyms); + info->init_typeoffs = mod->init_layout.size; + mod->init_layout.size += nsrc * sizeof(char); mod->init_layout.size = debug_align(mod->init_layout.size); } @@ -2683,20 +2687,23 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr; + mod->kallsyms->typetab = mod->init_layout.base + info->init_typeoffs; - /* Set types up while we still have access to sections. */ - for (i = 0; i < mod->kallsyms->num_symtab; i++) - mod->kallsyms->symtab[i].st_size - = elf_type(&mod->kallsyms->symtab[i], info); - - /* Now populate the cut down core kallsyms for after init. */ + /* + * Now populate the cut down core kallsyms for after init + * and set types up while we still have access to sections. + */ mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs; mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs; + mod->core_kallsyms.typetab = mod->core_layout.base + info->core_typeoffs; src = mod->kallsyms->symtab; for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) { + mod->kallsyms->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { + mod->core_kallsyms.typetab[ndst] = + mod->kallsyms->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name], @@ -4080,7 +4087,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, const Elf_Sym *sym = &kallsyms->symtab[symnum]; *value = kallsyms_symbol_value(sym); - *type = sym->st_size; + *type = kallsyms->typetab[symnum]; strlcpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); -- cgit From 2011fccfb61bbd1d7c8864b2b3ed7012342e9ba3 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 28 Mar 2019 18:01:57 -0700 Subject: bpf: Support variable offset stack access from helpers Currently there is a difference in how verifier checks memory access for helper arguments for PTR_TO_MAP_VALUE and PTR_TO_STACK with regard to variable part of offset. check_map_access, that is used for PTR_TO_MAP_VALUE, can handle variable offsets just fine, so that BPF program can call a helper like this: some_helper(map_value_ptr + off, size); , where offset is unknown at load time, but is checked by program to be in a safe rage (off >= 0 && off + size < map_value_size). But it's not the case for check_stack_boundary, that is used for PTR_TO_STACK, and same code with pointer to stack is rejected by verifier: some_helper(stack_value_ptr + off, size); For example: 0: (7a) *(u64 *)(r10 -16) = 0 1: (7a) *(u64 *)(r10 -8) = 0 2: (61) r2 = *(u32 *)(r1 +0) 3: (57) r2 &= 4 4: (17) r2 -= 16 5: (0f) r2 += r10 6: (18) r1 = 0xffff888111343a80 8: (85) call bpf_map_lookup_elem#1 invalid variable stack read R2 var_off=(0xfffffffffffffff0; 0x4) Add support for variable offset access to check_stack_boundary so that if offset is checked by program to be in a safe range it's accepted by verifier. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 75 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2fe89138309a..87221fda1321 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2157,6 +2157,29 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins BPF_SIZE(insn->code), BPF_WRITE, -1, true); } +static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, + int off, int access_size, + bool zero_size_allowed) +{ + struct bpf_reg_state *reg = reg_state(env, regno); + + if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || + access_size < 0 || (access_size == 0 && !zero_size_allowed)) { + if (tnum_is_const(reg->var_off)) { + verbose(env, "invalid stack type R%d off=%d access_size=%d\n", + regno, off, access_size); + } else { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", + regno, tn_buf, access_size); + } + return -EACCES; + } + return 0; +} + /* when register 'regno' is passed into function that will read 'access_size' * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized. @@ -2169,7 +2192,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, { struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); - int off, i, slot, spi; + int err, min_off, max_off, i, slot, spi; if (reg->type != PTR_TO_STACK) { /* Allow zero-byte read from NULL, regardless of pointer type */ @@ -2183,21 +2206,23 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, return -EACCES; } - /* Only allow fixed-offset stack reads */ - if (!tnum_is_const(reg->var_off)) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "invalid variable stack read R%d var_off=%s\n", - regno, tn_buf); - return -EACCES; - } - off = reg->off + reg->var_off.value; - if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || - access_size < 0 || (access_size == 0 && !zero_size_allowed)) { - verbose(env, "invalid stack type R%d off=%d access_size=%d\n", - regno, off, access_size); - return -EACCES; + if (tnum_is_const(reg->var_off)) { + min_off = max_off = reg->var_off.value + reg->off; + err = __check_stack_boundary(env, regno, min_off, access_size, + zero_size_allowed); + if (err) + return err; + } else { + min_off = reg->smin_value + reg->off; + max_off = reg->umax_value + reg->off; + err = __check_stack_boundary(env, regno, min_off, access_size, + zero_size_allowed); + if (err) + return err; + err = __check_stack_boundary(env, regno, max_off, access_size, + zero_size_allowed); + if (err) + return err; } if (meta && meta->raw_mode) { @@ -2206,10 +2231,10 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, return 0; } - for (i = 0; i < access_size; i++) { + for (i = min_off; i < max_off + access_size; i++) { u8 *stype; - slot = -(off + i) - 1; + slot = -i - 1; spi = slot / BPF_REG_SIZE; if (state->allocated_stack <= slot) goto err; @@ -2222,8 +2247,16 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, goto mark; } err: - verbose(env, "invalid indirect read from stack off %d+%d size %d\n", - off, i, access_size); + if (tnum_is_const(reg->var_off)) { + verbose(env, "invalid indirect read from stack off %d+%d size %d\n", + min_off, i - min_off, access_size); + } else { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", + tn_buf, i - min_off, access_size); + } return -EACCES; mark: /* reading any byte out of 8-byte 'spill_slot' will cause @@ -2232,7 +2265,7 @@ mark: mark_reg_read(env, &state->stack[spi].spilled_ptr, state->stack[spi].spilled_ptr.parent); } - return update_stack_depth(env, state, off); + return update_stack_depth(env, state, min_off); } static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, -- cgit From 40ed29b373381532ef222e509c5aa69a1d8561ea Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sun, 23 Sep 2018 12:11:33 +0000 Subject: ring-buffer: Fix ring buffer size in rb_write_something() 'cnt' should be used to calculate ring buffer size rather than data->cnt Link: http://lkml.kernel.org/r/1537704693-184237-1-git-send-email-yuehaibing@huawei.com Signed-off-by: YueHaibing Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 41b6f96e5366..4f33d7d841af 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4979,7 +4979,7 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested) cnt = data->cnt + (nested ? 27 : 0); /* Multiply cnt by ~e, to make some unique increment */ - size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1); + size = (cnt * 68 / 25) % (sizeof(rb_string) - 1); len = size + sizeof(struct rb_item); -- cgit From f45d1225adb0479478cee989e2ae2d7d2c62b31b Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Wed, 20 Mar 2019 11:28:51 -0700 Subject: tracing: Kernel access to Ftrace instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ftrace provides the feature “instances” that provides the capability to create multiple Ftrace ring buffers. However, currently these buffers are created/accessed via userspace only. The kernel APIs providing these features are not exported, hence cannot be used by other kernel components. This patch aims to extend this infrastructure to provide the flexibility to create/log/remove/ enable-disable existing trace events to these buffers from within the kernel. Link: http://lkml.kernel.org/r/1553106531-3281-2-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Reviewed-by: Joe Jin Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 74 ++++++++++++++++++++++++++++++--------------- kernel/trace/trace_events.c | 1 + 2 files changed, 51 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 21153e64bf1c..4384fcc386c8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3053,6 +3053,7 @@ void trace_printk_init_buffers(void) if (global_trace.trace_buffer.buffer) tracing_start_cmdline_record(); } +EXPORT_SYMBOL_GPL(trace_printk_init_buffers); void trace_printk_start_comm(void) { @@ -3213,6 +3214,7 @@ int trace_array_printk(struct trace_array *tr, va_end(ap); return ret; } +EXPORT_SYMBOL_GPL(trace_array_printk); __printf(3, 4) int trace_array_printk_buf(struct ring_buffer *buffer, @@ -8037,7 +8039,7 @@ static void update_tracer_options(struct trace_array *tr) mutex_unlock(&trace_types_lock); } -static int instance_mkdir(const char *name) +struct trace_array *trace_array_create(const char *name) { struct trace_array *tr; int ret; @@ -8101,7 +8103,7 @@ static int instance_mkdir(const char *name) mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); - return 0; + return tr; out_free_tr: free_trace_buffers(tr); @@ -8113,33 +8115,21 @@ static int instance_mkdir(const char *name) mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); - return ret; + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(trace_array_create); +static int instance_mkdir(const char *name) +{ + return PTR_ERR_OR_ZERO(trace_array_create(name)); } -static int instance_rmdir(const char *name) +static int __remove_instance(struct trace_array *tr) { - struct trace_array *tr; - int found = 0; - int ret; int i; - mutex_lock(&event_mutex); - mutex_lock(&trace_types_lock); - - ret = -ENODEV; - list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr->name && strcmp(tr->name, name) == 0) { - found = 1; - break; - } - } - if (!found) - goto out_unlock; - - ret = -EBUSY; if (tr->ref || (tr->current_trace && tr->current_trace->ref)) - goto out_unlock; + return -EBUSY; list_del(&tr->list); @@ -8165,10 +8155,46 @@ static int instance_rmdir(const char *name) free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); + tr = NULL; - ret = 0; + return 0; +} + +int trace_array_destroy(struct trace_array *tr) +{ + int ret; + + if (!tr) + return -EINVAL; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = __remove_instance(tr); + + mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(trace_array_destroy); + +static int instance_rmdir(const char *name) +{ + struct trace_array *tr; + int ret; + + mutex_lock(&event_mutex); + mutex_lock(&trace_types_lock); + + ret = -ENODEV; + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (tr->name && strcmp(tr->name, name) == 0) { + ret = __remove_instance(tr); + break; + } + } - out_unlock: mutex_unlock(&trace_types_lock); mutex_unlock(&event_mutex); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5b3b0c3c8a47..81c038ed6cee 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -832,6 +832,7 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) return ret; } +EXPORT_SYMBOL_GPL(ftrace_set_clr_event); /** * trace_set_clr_event - enable or disable an event -- cgit From 8a062902be725f647dc8da532b04d836546a369a Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:15 -0500 Subject: tracing: Add tracing error log Introduce a new ftrace file, tracing/error_log, for ftrace commands to log errors. This is useful for allowing more complex commands such as hist trigger and kprobe_event commands to point out specifically where something may have gone wrong without forcing them to resort to more ad hoc methods such as tacking error messages onto existing output files. To log a tracing error, call the event_log_err() function, passing it a location string describing where it came from e.g. kprobe_events or system:event, the command that caused the error, an array of static error strings describing errors and an index within that array which describes the specific error, along with the position to place the error caret. Reading the log displays the last (currently) 8 errors logged in the following format: [timestamp] : error: Command: ^ Memory for the error log isn't allocated unless there has been a trace event error, and the error log can be cleared and have its memory freed by writing the empty string in truncation mode to it: # echo > tracing/error_log. Link: http://lkml.kernel.org/r/0c2c82571fd38c5f3a88ca823627edff250e9416.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Suggested-by: Masami Hiramatsu Improvements-suggested-by: Steve Rostedt Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 218 +++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 4 + 2 files changed, 222 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4384fcc386c8..7978168f5041 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6878,6 +6878,221 @@ static const struct file_operations snapshot_raw_fops = { #endif /* CONFIG_TRACER_SNAPSHOT */ +#define TRACING_LOG_ERRS_MAX 8 +#define TRACING_LOG_LOC_MAX 128 + +#define CMD_PREFIX " Command: " + +struct err_info { + const char **errs; /* ptr to loc-specific array of err strings */ + u8 type; /* index into errs -> specific err string */ + u8 pos; /* MAX_FILTER_STR_VAL = 256 */ + u64 ts; +}; + +struct tracing_log_err { + struct list_head list; + struct err_info info; + char loc[TRACING_LOG_LOC_MAX]; /* err location */ + char cmd[MAX_FILTER_STR_VAL]; /* what caused err */ +}; + +static LIST_HEAD(tracing_err_log); +static DEFINE_MUTEX(tracing_err_log_lock); + +static unsigned int n_tracing_err_log_entries; + +struct tracing_log_err *get_tracing_log_err(void) +{ + struct tracing_log_err *err; + + if (n_tracing_err_log_entries < TRACING_LOG_ERRS_MAX) { + err = kzalloc(sizeof(*err), GFP_KERNEL); + if (!err) + err = ERR_PTR(-ENOMEM); + n_tracing_err_log_entries++; + + return err; + } + + err = list_first_entry(&tracing_err_log, struct tracing_log_err, list); + list_del(&err->list); + + return err; +} + +/** + * err_pos - find the position of a string within a command for error careting + * @cmd: The tracing command that caused the error + * @str: The string to position the caret at within @cmd + * + * Finds the position of the first occurence of @str within @cmd. The + * return value can be passed to tracing_log_err() for caret placement + * within @cmd. + * + * Returns the index within @cmd of the first occurence of @str or 0 + * if @str was not found. + */ +unsigned int err_pos(char *cmd, const char *str) +{ + char *found; + + if (WARN_ON(!strlen(cmd))) + return 0; + + found = strstr(cmd, str); + if (found) + return found - cmd; + + return 0; +} + +/** + * tracing_log_err - write an error to the tracing error log + * @loc: A string describing where the error occurred + * @cmd: The tracing command that caused the error + * @errs: The array of loc-specific static error strings + * @type: The index into errs[], which produces the specific static err string + * @pos: The position the caret should be placed in the cmd + * + * Writes an error into tracing/error_log of the form: + * + * : error: + * Command: + * ^ + * + * tracing/error_log is a small log file containing the last + * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated + * unless there has been a tracing error, and the error log can be + * cleared and have its memory freed by writing the empty string in + * truncation mode to it i.e. echo > tracing/error_log. + * + * NOTE: the @errs array along with the @type param are used to + * produce a static error string - this string is not copied and saved + * when the error is logged - only a pointer to it is saved. See + * existing callers for examples of how static strings are typically + * defined for use with tracing_log_err(). + */ +void tracing_log_err(const char *loc, const char *cmd, + const char **errs, u8 type, u8 pos) +{ + struct tracing_log_err *err; + + mutex_lock(&tracing_err_log_lock); + err = get_tracing_log_err(); + if (PTR_ERR(err) == -ENOMEM) { + mutex_unlock(&tracing_err_log_lock); + return; + } + + snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); + snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd); + + err->info.errs = errs; + err->info.type = type; + err->info.pos = pos; + err->info.ts = local_clock(); + + list_add_tail(&err->list, &tracing_err_log); + mutex_unlock(&tracing_err_log_lock); +} + +static void clear_tracing_err_log(void) +{ + struct tracing_log_err *err, *next; + + mutex_lock(&tracing_err_log_lock); + list_for_each_entry_safe(err, next, &tracing_err_log, list) { + list_del(&err->list); + kfree(err); + } + + n_tracing_err_log_entries = 0; + mutex_unlock(&tracing_err_log_lock); +} + +static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&tracing_err_log_lock); + + return seq_list_start(&tracing_err_log, *pos); +} + +static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &tracing_err_log, pos); +} + +static void tracing_err_log_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&tracing_err_log_lock); +} + +static void tracing_err_log_show_pos(struct seq_file *m, u8 pos) +{ + u8 i; + + for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) + seq_putc(m, ' '); + for (i = 0; i < pos; i++) + seq_putc(m, ' '); + seq_puts(m, "^\n"); +} + +static int tracing_err_log_seq_show(struct seq_file *m, void *v) +{ + struct tracing_log_err *err = v; + + if (err) { + const char *err_text = err->info.errs[err->info.type]; + u64 sec = err->info.ts; + u32 nsec; + + nsec = do_div(sec, NSEC_PER_SEC); + seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, + err->loc, err_text); + seq_printf(m, "%s", err->cmd); + tracing_err_log_show_pos(m, err->info.pos); + } + + return 0; +} + +static const struct seq_operations tracing_err_log_seq_ops = { + .start = tracing_err_log_seq_start, + .next = tracing_err_log_seq_next, + .stop = tracing_err_log_seq_stop, + .show = tracing_err_log_seq_show +}; + +static int tracing_err_log_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + /* If this file was opened for write, then erase contents */ + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) + clear_tracing_err_log(); + + if (file->f_mode & FMODE_READ) + ret = seq_open(file, &tracing_err_log_seq_ops); + + return ret; +} + +static ssize_t tracing_err_log_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + return count; +} + +static const struct file_operations tracing_err_log_fops = { + .open = tracing_err_log_open, + .write = tracing_err_log_write, + .read = seq_read, + .llseek = seq_lseek, +}; + static int tracing_buffers_open(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; @@ -8284,6 +8499,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) tr, &snapshot_fops); #endif + trace_create_file("error_log", 0644, d_tracer, + tr, &tracing_err_log_fops); + for_each_tracing_cpu(cpu) tracing_init_tracefs_percpu(tr, cpu); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d80cee49e0eb..b711edbef7e7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1884,6 +1884,10 @@ extern ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, size_t count, loff_t *ppos, int (*createfn)(int, char**)); +extern unsigned int err_pos(char *cmd, const char *str); +extern void tracing_log_err(const char *loc, const char *cmd, + const char **errs, u8 type, u8 pos); + /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats -- cgit From a1a05bb40e229d148c071fcd2ed787b21f61ff8b Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:16 -0500 Subject: tracing: Save the last hist command's associated event name In preparation for making use of the new trace error log, save the subsystem and event name associated with the last hist command - it will be passed as the location param in the event_log_err() calls. Link: http://lkml.kernel.org/r/eb0fd1362be8f39facb86c83eecf441b7a5876f8.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 795aa2038377..0de702bf148f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -535,15 +535,34 @@ static struct track_data *track_data_alloc(unsigned int key_len, return data; } -static char last_hist_cmd[MAX_FILTER_STR_VAL]; +static char last_cmd[MAX_FILTER_STR_VAL]; +static char last_cmd_loc[MAX_FILTER_STR_VAL]; + static char hist_err_str[MAX_FILTER_STR_VAL]; -static void last_cmd_set(char *str) +static void last_cmd_set(struct trace_event_file *file, char *str) { + const char *system = NULL, *name = NULL; + struct trace_event_call *call; + if (!str) return; - strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1); + strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1); + + if (file) { + call = file->event_call; + + system = call->class->system; + if (system) { + name = trace_event_name(call); + if (!name) + system = NULL; + } + } + + if (system) + snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name); } static void hist_err(char *str, char *var) @@ -583,6 +602,8 @@ static void hist_err_event(char *str, char *system, char *event, char *var) static void hist_err_clear(void) { hist_err_str[0] = '\0'; + last_cmd[0] = '\0'; + last_cmd_loc[0] = '\0'; } static bool have_hist_err(void) @@ -5438,8 +5459,8 @@ static int hist_show(struct seq_file *m, void *v) } if (have_hist_err()) { - seq_printf(m, "\nERROR: %s\n", hist_err_str); - seq_printf(m, " Last command: %s\n", last_hist_cmd); + seq_printf(m, "\n%s: error: \n", hist_err_str); + seq_printf(m, " Last command: %s\n", last_cmd); } out_unlock: @@ -6043,8 +6064,8 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, lockdep_assert_held(&event_mutex); if (glob && strlen(glob)) { - last_cmd_set(param); hist_err_clear(); + last_cmd_set(file, param); } if (!param) -- cgit From d566c5e9d1bad6773fe9cce3d4514cca2cc32e4e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:17 -0500 Subject: tracing: Use tracing error_log with hist triggers Replace hist_err() and hist_err_event() with tracing_log_err() from the new tracing error_log mechanism. Also add a couple related helper functions and remove most of the old hist_err()-related code. With this change, users no longer read the hist files for hist trigger error information, but instead look at tracing/error_log for the same information. Link: http://lkml.kernel.org/r/c98f77a97c9715d18b623eeb5741057b330d5ac0.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 206 ++++++++++++++++++++------------------- 1 file changed, 104 insertions(+), 102 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0de702bf148f..071c62cacba7 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -22,6 +22,57 @@ #define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ +#define ERRORS \ + C(NONE, "No error"), \ + C(DUPLICATE_VAR, "Variable already defined"), \ + C(VAR_NOT_UNIQUE, "Variable name not unique, need to use fully qualified name (subsys.event.var) for variable"), \ + C(TOO_MANY_VARS, "Too many variables defined"), \ + C(MALFORMED_ASSIGNMENT, "Malformed assignment"), \ + C(NAMED_MISMATCH, "Named hist trigger doesn't match existing named trigger (includes variables)"), \ + C(TRIGGER_EEXIST, "Hist trigger already exists"), \ + C(TRIGGER_ENOENT_CLEAR, "Can't clear or continue a nonexistent hist trigger"), \ + C(SET_CLOCK_FAIL, "Couldn't set trace_clock"), \ + C(BAD_FIELD_MODIFIER, "Invalid field modifier"), \ + C(TOO_MANY_SUBEXPR, "Too many subexpressions (3 max)"), \ + C(TIMESTAMP_MISMATCH, "Timestamp units in expression don't match"), \ + C(TOO_MANY_FIELD_VARS, "Too many field variables defined"), \ + C(EVENT_FILE_NOT_FOUND, "Event file not found"), \ + C(HIST_NOT_FOUND, "Matching event histogram not found"), \ + C(HIST_CREATE_FAIL, "Couldn't create histogram for field"), \ + C(SYNTH_VAR_NOT_FOUND, "Couldn't find synthetic variable"), \ + C(SYNTH_EVENT_NOT_FOUND,"Couldn't find synthetic event"), \ + C(SYNTH_TYPE_MISMATCH, "Param type doesn't match synthetic event field type"), \ + C(SYNTH_COUNT_MISMATCH, "Param count doesn't match synthetic event field count"), \ + C(FIELD_VAR_PARSE_FAIL, "Couldn't parse field variable"), \ + C(VAR_CREATE_FIND_FAIL, "Couldn't create or find variable"), \ + C(ONX_NOT_VAR, "For onmax(x) or onchange(x), x must be a variable"), \ + C(ONX_VAR_NOT_FOUND, "Couldn't find onmax or onchange variable"), \ + C(ONX_VAR_CREATE_FAIL, "Couldn't create onmax or onchange variable"), \ + C(FIELD_VAR_CREATE_FAIL,"Couldn't create field variable"), \ + C(TOO_MANY_PARAMS, "Too many action params"), \ + C(PARAM_NOT_FOUND, "Couldn't find param"), \ + C(INVALID_PARAM, "Invalid action param"), \ + C(ACTION_NOT_FOUND, "No action found"), \ + C(NO_SAVE_PARAMS, "No params found for save()"), \ + C(TOO_MANY_SAVE_ACTIONS,"Can't have more than one save() action per hist"), \ + C(ACTION_MISMATCH, "Handler doesn't support action"), \ + C(NO_CLOSING_PAREN, "No closing paren found"), \ + C(SUBSYS_NOT_FOUND, "Missing subsystem"), \ + C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \ + C(INVALID_REF_KEY, "Using variable references as keys not supported"), \ + C(VAR_NOT_FOUND, "Couldn't find variable"), \ + C(FIELD_NOT_FOUND, "Couldn't find field"), + +#undef C +#define C(a, b) HIST_ERR_##a + +enum { ERRORS }; + +#undef C +#define C(a, b) b + +static const char *err_text[] = { ERRORS }; + struct hist_field; typedef u64 (*hist_field_fn_t) (struct hist_field *field, @@ -538,7 +589,10 @@ static struct track_data *track_data_alloc(unsigned int key_len, static char last_cmd[MAX_FILTER_STR_VAL]; static char last_cmd_loc[MAX_FILTER_STR_VAL]; -static char hist_err_str[MAX_FILTER_STR_VAL]; +static int errpos(char *str) +{ + return err_pos(last_cmd, str); +} static void last_cmd_set(struct trace_event_file *file, char *str) { @@ -565,55 +619,17 @@ static void last_cmd_set(struct trace_event_file *file, char *str) snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name); } -static void hist_err(char *str, char *var) +static void hist_err(u8 err_type, u8 err_pos) { - int maxlen = MAX_FILTER_STR_VAL - 1; - - if (!str) - return; - - if (strlen(hist_err_str)) - return; - - if (!var) - var = ""; - - if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen) - return; - - strcat(hist_err_str, str); - strcat(hist_err_str, var); -} - -static void hist_err_event(char *str, char *system, char *event, char *var) -{ - char err[MAX_FILTER_STR_VAL]; - - if (system && var) - snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var); - else if (system) - snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); - else - strscpy(err, var, MAX_FILTER_STR_VAL); - - hist_err(str, err); + tracing_log_err(last_cmd_loc, last_cmd, err_text, err_type, err_pos); } static void hist_err_clear(void) { - hist_err_str[0] = '\0'; last_cmd[0] = '\0'; last_cmd_loc[0] = '\0'; } -static bool have_hist_err(void) -{ - if (strlen(hist_err_str)) - return true; - - return false; -} - struct synth_trace_event { struct trace_entry ent; u64 fields[]; @@ -1740,7 +1756,7 @@ static struct trace_event_file *find_var_file(struct trace_array *tr, if (find_var_field(var_hist_data, var_name)) { if (found) { - hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); return NULL; } @@ -1791,7 +1807,7 @@ find_match_var(struct hist_trigger_data *hist_data, char *var_name) hist_field = find_file_var(file, var_name); if (hist_field) { if (found) { - hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); + hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); return ERR_PTR(-EINVAL); } @@ -2023,7 +2039,6 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) attrs->n_actions++; ret = 0; } - return ret; } @@ -2083,7 +2098,7 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) char *assignment; if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { - hist_err("Too many variables defined: ", str); + hist_err(HIST_ERR_TOO_MANY_VARS, errpos(str)); ret = -EINVAL; goto out; } @@ -2681,8 +2696,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, system, event_name); if (!ref_field) - hist_err_event("Couldn't find variable: $", - system, event_name, var_name); + hist_err(HIST_ERR_VAR_NOT_FOUND, errpos(var_name)); return ref_field; } @@ -2716,7 +2730,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else if (strcmp(modifier, "usecs") == 0) *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; else { - hist_err("Invalid field modifier: ", modifier); + hist_err(HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); field = ERR_PTR(-EINVAL); goto out; } @@ -2732,7 +2746,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err("Couldn't find field: ", field_name); + hist_err(HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); field = ERR_PTR(-EINVAL); goto out; } @@ -2843,7 +2857,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, /* we support only -(xxx) i.e. explicit parens required */ if (level > 3) { - hist_err("Too many subexpressions (3 max): ", str); + hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; } @@ -2926,7 +2940,7 @@ static int check_expr_operands(struct hist_field *operand1, if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { - hist_err("Timestamp units in expression don't match", NULL); + hist_err(HIST_ERR_TIMESTAMP_MISMATCH, 0); return -EINVAL; } @@ -2944,7 +2958,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, char *sep, *operand1_str; if (level > 3) { - hist_err("Too many subexpressions (3 max): ", str); + hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } @@ -3182,16 +3196,14 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, int ret; if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { - hist_err_event("trace action: Too many field variables defined: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); return ERR_PTR(-EINVAL); } file = event_file(tr, subsys_name, event_name); if (IS_ERR(file)) { - hist_err_event("trace action: Event file not found: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name)); ret = PTR_ERR(file); return ERR_PTR(ret); } @@ -3204,8 +3216,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, */ hist_data = find_compatible_hist(target_hist_data, file); if (!hist_data) { - hist_err_event("trace action: Matching event histogram not found: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_HIST_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3266,8 +3277,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, kfree(cmd); kfree(var_hist->cmd); kfree(var_hist); - hist_err_event("trace action: Couldn't create histogram for field: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_HIST_CREATE_FAIL, errpos(field_name)); return ERR_PTR(ret); } @@ -3279,8 +3289,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, if (IS_ERR_OR_NULL(event_var)) { kfree(var_hist->cmd); kfree(var_hist); - hist_err_event("trace action: Couldn't find synthetic variable: ", - subsys_name, event_name, field_name); + hist_err(HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3417,21 +3426,21 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, int ret = 0; if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { - hist_err("Too many field variables defined: ", field_name); + hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); ret = -EINVAL; goto err; } val = parse_atom(hist_data, file, field_name, &flags, NULL); if (IS_ERR(val)) { - hist_err("Couldn't parse field variable: ", field_name); + hist_err(HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name)); ret = PTR_ERR(val); goto err; } var = create_var(hist_data, file, field_name, val->size, val->type); if (IS_ERR(var)) { - hist_err("Couldn't create or find variable: ", field_name); + hist_err(HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); kfree(val); ret = PTR_ERR(var); goto err; @@ -3763,14 +3772,14 @@ static int track_data_create(struct hist_trigger_data *hist_data, track_data_var_str = data->track_data.var_str; if (track_data_var_str[0] != '$') { - hist_err("For onmax(x) or onchange(x), x must be a variable: ", track_data_var_str); + hist_err(HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str)); return -EINVAL; } track_data_var_str++; var_field = find_target_event_var(hist_data, NULL, NULL, track_data_var_str); if (!var_field) { - hist_err("Couldn't find onmax or onchange variable: ", track_data_var_str); + hist_err(HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str)); return -EINVAL; } @@ -3783,7 +3792,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONMAX) track_var = create_var(hist_data, file, "__max", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err("Couldn't create onmax variable: ", "__max"); + hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3791,7 +3800,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONCHANGE) track_var = create_var(hist_data, file, "__change", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err("Couldn't create onchange variable: ", "__change"); + hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3810,20 +3819,20 @@ static int parse_action_params(char *params, struct action_data *data) while (params) { if (data->n_params >= SYNTH_FIELDS_MAX) { - hist_err("Too many action params", ""); + hist_err(HIST_ERR_TOO_MANY_PARAMS, 0); goto out; } param = strsep(¶ms, ","); if (!param) { - hist_err("No action param found", ""); + hist_err(HIST_ERR_PARAM_NOT_FOUND, 0); ret = -EINVAL; goto out; } param = strstrip(param); if (strlen(param) < 2) { - hist_err("Invalid action param: ", param); + hist_err(HIST_ERR_INVALID_PARAM, errpos(param)); ret = -EINVAL; goto out; } @@ -3855,14 +3864,14 @@ static int action_parse(char *str, struct action_data *data, strsep(&str, "."); if (!str) { - hist_err("action parsing: No action found", ""); + hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } action_name = strsep(&str, "("); if (!action_name || !str) { - hist_err("action parsing: No action found", ""); + hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } @@ -3871,7 +3880,7 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!params) { - hist_err("action parsing: No params found for %s", "save"); + hist_err(HIST_ERR_NO_SAVE_PARAMS, 0); ret = -EINVAL; goto out; } @@ -3885,7 +3894,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err("action parsing: Handler doesn't support action: ", action_name); + hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3897,7 +3906,7 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!str) { - hist_err("action parsing: No closing paren found: %s", params); + hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(params)); ret = -EINVAL; goto out; } @@ -3907,7 +3916,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err("action parsing: Handler doesn't support action: ", action_name); + hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -4060,7 +4069,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, } if (!hist_field) - hist_err_event("trace action: Couldn't find param: $", system, event, var); + hist_err(HIST_ERR_PARAM_NOT_FOUND, errpos(var)); return hist_field; } @@ -4135,7 +4144,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event = find_synth_event(synth_event_name); if (!event) { - hist_err("trace action: Couldn't find synthetic event: ", synth_event_name); + hist_err(HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name)); return -EINVAL; } @@ -4196,15 +4205,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, continue; } - hist_err_event("trace action: Param type doesn't match synthetic event field type: ", - system, event_name, param); + hist_err(HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param)); kfree(p); ret = -EINVAL; goto err; } if (field_pos != event->n_fields) { - hist_err("trace action: Param count doesn't match synthetic event field count: ", event->name); + hist_err(HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name)); ret = -EINVAL; goto err; } @@ -4250,7 +4258,7 @@ static int action_create(struct hist_trigger_data *hist_data, if (data->action == ACTION_SAVE) { if (hist_data->n_save_vars) { ret = -EEXIST; - hist_err("save action: Can't have more than one save() action per hist", ""); + hist_err(HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0); goto out; } @@ -4263,7 +4271,7 @@ static int action_create(struct hist_trigger_data *hist_data, field_var = create_target_field_var(hist_data, NULL, NULL, param); if (IS_ERR(field_var)) { - hist_err("save action: Couldn't create field variable: ", param); + hist_err(HIST_ERR_FIELD_VAR_CREATE_FAIL, errpos(param)); ret = PTR_ERR(field_var); kfree(param); goto out; @@ -4297,19 +4305,18 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) match_event = strsep(&str, ")"); if (!match_event || !str) { - hist_err("onmatch: Missing closing paren: ", match_event); + hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(match_event)); goto free; } match_event_system = strsep(&match_event, "."); if (!match_event) { - hist_err("onmatch: Missing subsystem for match event: ", match_event_system); + hist_err(HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system)); goto free; } if (IS_ERR(event_file(tr, match_event_system, match_event))) { - hist_err_event("onmatch: Invalid subsystem or event name: ", - match_event_system, match_event, NULL); + hist_err(HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event)); goto free; } @@ -4400,7 +4407,7 @@ static int create_var_field(struct hist_trigger_data *hist_data, return -EINVAL; if (find_var(hist_data, file, var_name) && !hist_data->remove) { - hist_err("Variable already defined: ", var_name); + hist_err(HIST_ERR_DUPLICATE_VAR, errpos(var_name)); return -EINVAL; } @@ -4481,7 +4488,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { - hist_err("Using variable references as keys not supported: ", field_str); + hist_err(HIST_ERR_INVALID_REF_KEY, errpos(field_str)); destroy_hist_field(hist_field, 0); ret = -EINVAL; goto out; @@ -4595,13 +4602,13 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) var_name = strsep(&field_str, "="); if (!var_name || !field_str) { - hist_err("Malformed assignment: ", var_name); + hist_err(HIST_ERR_MALFORMED_ASSIGNMENT, errpos(var_name)); ret = -EINVAL; goto free; } if (n_vars == TRACING_MAP_VARS_MAX) { - hist_err("Too many variables defined: ", var_name); + hist_err(HIST_ERR_TOO_MANY_VARS, errpos(var_name)); ret = -EINVAL; goto free; } @@ -5458,11 +5465,6 @@ static int hist_show(struct seq_file *m, void *v) hist_trigger_show(m, data, n++); } - if (have_hist_err()) { - seq_printf(m, "\n%s: error: \n", hist_err_str); - seq_printf(m, " Last command: %s\n", last_cmd); - } - out_unlock: mutex_unlock(&event_mutex); @@ -5834,7 +5836,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (named_data) { if (!hist_trigger_match(data, named_data, named_data, true)) { - hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name); + hist_err(HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name)); ret = -EINVAL; goto out; } @@ -5855,7 +5857,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, else if (hist_data->attrs->clear) hist_clear(test); else { - hist_err("Hist trigger already exists", NULL); + hist_err(HIST_ERR_TRIGGER_EEXIST, 0); ret = -EEXIST; } goto out; @@ -5863,7 +5865,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, } new: if (hist_data->attrs->cont || hist_data->attrs->clear) { - hist_err("Can't clear or continue a nonexistent hist trigger", NULL); + hist_err(HIST_ERR_TRIGGER_ENOENT_CLEAR, 0); ret = -ENOENT; goto out; } @@ -5888,7 +5890,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, ret = tracing_set_clock(file->tr, hist_data->attrs->clock); if (ret) { - hist_err("Couldn't set trace_clock: ", clock); + hist_err(HIST_ERR_SET_CLOCK_FAIL, errpos(clock)); goto out; } -- cgit From 34f76afaac7a437a2ce381225135563928b359dd Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:18 -0500 Subject: tracing: Use tracing error_log with trace event filters Use tracing_log_err() from the new tracing error_log mechanism to send filter parse errors to tracing/error_log. With this change, users will be able to see filter errors by looking at tracing/error_log. The same errors will also be available in the filter file, as expected. Link: http://lkml.kernel.org/r/1d942c419941539a11d78a6810fc5740a99b2974.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 05a66493a164..290d42c59101 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -66,7 +66,8 @@ static const char * ops[] = { OPS }; C(INVALID_FILTER, "Meaningless filter expression"), \ C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ - C(NO_FILTER, "No filter found"), + C(ERRNO, "Error"), \ + C(NO_FILTER, "No filter found") #undef C #define C(a, b) FILT_ERR_##a @@ -76,7 +77,7 @@ enum { ERRORS }; #undef C #define C(a, b) b -static char *err_text[] = { ERRORS }; +static const char *err_text[] = { ERRORS }; /* Called after a '!' character but "!=" and "!~" are not "not"s */ static bool is_not(const char *str) @@ -947,8 +948,14 @@ static void append_filter_err(struct filter_parse_error *pe, if (pe->lasterr > 0) { trace_seq_printf(s, "\n%*s", pos, "^"); trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); + tracing_log_err("event filter parse error", + filter->filter_string, err_text, + pe->lasterr, pe->lasterr_pos); } else { trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); + tracing_log_err("event filter parse error", + filter->filter_string, err_text, + FILT_ERR_ERRNO, 0); } trace_seq_putc(s, 0); buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL); -- cgit From ab105a4fb89496c71c5a0f3222347c506c30feb0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 31 Mar 2019 18:48:19 -0500 Subject: tracing: Use tracing error_log with probe events Use tracing error_log with probe events for logging error more precisely. This also makes all parse error returns -EINVAL (except for -ENOMEM), because user can see better error message in error_log file now. Link: http://lkml.kernel.org/r/6a4d90e141d138040ea61f4776b991597077451e.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 77 ++++++++----- kernel/trace/trace_probe.c | 274 +++++++++++++++++++++++++++++++------------- kernel/trace/trace_probe.h | 77 ++++++++++++- kernel/trace/trace_uprobe.c | 44 ++++--- 4 files changed, 348 insertions(+), 124 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5d5129b05df7..7d736248a070 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -441,13 +441,8 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) else ret = register_kprobe(&tk->rp.kp); - if (ret == 0) { + if (ret == 0) tk->tp.flags |= TP_FLAG_REGISTERED; - } else if (ret == -EILSEQ) { - pr_warn("Probing address(0x%p) is not an instruction boundary.\n", - tk->rp.kp.addr); - ret = -EINVAL; - } return ret; } @@ -591,7 +586,7 @@ static int trace_kprobe_create(int argc, const char *argv[]) * Type of args: * FETCHARG:TYPE : use TYPE instead of unsigned long. */ - struct trace_kprobe *tk; + struct trace_kprobe *tk = NULL; int i, len, ret = 0; bool is_return = false; char *symbol = NULL, *tmp = NULL; @@ -615,44 +610,50 @@ static int trace_kprobe_create(int argc, const char *argv[]) if (argc < 2) return -ECANCELED; + trace_probe_log_init("trace_kprobe", argc, argv); + event = strchr(&argv[0][1], ':'); if (event) event++; if (isdigit(argv[0][1])) { if (!is_return) { - pr_info("Maxactive is not for kprobe"); - return -EINVAL; + trace_probe_log_err(1, MAXACT_NO_KPROBE); + goto parse_error; } if (event) len = event - &argv[0][1] - 1; else len = strlen(&argv[0][1]); - if (len > MAX_EVENT_NAME_LEN - 1) - return -E2BIG; + if (len > MAX_EVENT_NAME_LEN - 1) { + trace_probe_log_err(1, BAD_MAXACT); + goto parse_error; + } memcpy(buf, &argv[0][1], len); buf[len] = '\0'; ret = kstrtouint(buf, 0, &maxactive); if (ret || !maxactive) { - pr_info("Invalid maxactive number\n"); - return ret; + trace_probe_log_err(1, BAD_MAXACT); + goto parse_error; } /* kretprobes instances are iterated over via a list. The * maximum should stay reasonable. */ if (maxactive > KRETPROBE_MAXACTIVE_MAX) { - pr_info("Maxactive is too big (%d > %d).\n", - maxactive, KRETPROBE_MAXACTIVE_MAX); - return -E2BIG; + trace_probe_log_err(1, MAXACT_TOO_BIG); + goto parse_error; } } /* try to parse an address. if that fails, try to read the * input as a symbol. */ if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) { + trace_probe_log_set_index(1); /* Check whether uprobe event specified */ - if (strchr(argv[1], '/') && strchr(argv[1], ':')) - return -ECANCELED; + if (strchr(argv[1], '/') && strchr(argv[1], ':')) { + ret = -ECANCELED; + goto error; + } /* a symbol specified */ symbol = kstrdup(argv[1], GFP_KERNEL); if (!symbol) @@ -660,23 +661,23 @@ static int trace_kprobe_create(int argc, const char *argv[]) /* TODO: support .init module functions */ ret = traceprobe_split_symbol_offset(symbol, &offset); if (ret || offset < 0 || offset > UINT_MAX) { - pr_info("Failed to parse either an address or a symbol.\n"); - goto out; + trace_probe_log_err(0, BAD_PROBE_ADDR); + goto parse_error; } if (kprobe_on_func_entry(NULL, symbol, offset)) flags |= TPARG_FL_FENTRY; if (offset && is_return && !(flags & TPARG_FL_FENTRY)) { - pr_info("Given offset is not valid for return probe.\n"); - ret = -EINVAL; - goto out; + trace_probe_log_err(0, BAD_RETPROBE); + goto parse_error; } } - argc -= 2; argv += 2; + trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf); + ret = traceprobe_parse_event_name(&event, &group, buf, + event - argv[0]); if (ret) - goto out; + goto parse_error; } else { /* Make a new event name */ if (symbol) @@ -691,13 +692,14 @@ static int trace_kprobe_create(int argc, const char *argv[]) /* setup a probe */ tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, - argc, is_return); + argc - 2, is_return); if (IS_ERR(tk)) { ret = PTR_ERR(tk); - /* This must return -ENOMEM otherwise there is a bug */ + /* This must return -ENOMEM, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM); - goto out; + goto out; /* We know tk is not allocated */ } + argc -= 2; argv += 2; /* parse arguments */ for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { @@ -707,19 +709,32 @@ static int trace_kprobe_create(int argc, const char *argv[]) goto error; } + trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags); kfree(tmp); if (ret) - goto error; + goto error; /* This can be -ENOMEM */ } ret = register_trace_kprobe(tk); - if (ret) + if (ret) { + trace_probe_log_set_index(1); + if (ret == -EILSEQ) + trace_probe_log_err(0, BAD_INSN_BNDRY); + else if (ret == -ENOENT) + trace_probe_log_err(0, BAD_PROBE_ADDR); + else if (ret != -ENOMEM) + trace_probe_log_err(0, FAIL_REG_PROBE); goto error; + } + out: + trace_probe_log_clear(); kfree(symbol); return ret; +parse_error: + ret = -EINVAL; error: free_trace_kprobe(tk); goto out; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8f8411e7835f..e11f98c49d72 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -13,6 +13,11 @@ #include "trace_probe.h" +#undef C +#define C(a, b) b + +static const char *trace_probe_err_text[] = { ERRORS }; + static const char *reserved_field_names[] = { "common_type", "common_flags", @@ -133,6 +138,60 @@ fail: return NULL; } +static struct trace_probe_log trace_probe_log; + +void trace_probe_log_init(const char *subsystem, int argc, const char **argv) +{ + trace_probe_log.subsystem = subsystem; + trace_probe_log.argc = argc; + trace_probe_log.argv = argv; + trace_probe_log.index = 0; +} + +void trace_probe_log_clear(void) +{ + memset(&trace_probe_log, 0, sizeof(trace_probe_log)); +} + +void trace_probe_log_set_index(int index) +{ + trace_probe_log.index = index; +} + +void __trace_probe_log_err(int offset, int err_type) +{ + char *command, *p; + int i, len = 0, pos = 0; + + if (!trace_probe_log.argv) + return; + + /* Recalcurate the length and allocate buffer */ + for (i = 0; i < trace_probe_log.argc; i++) { + if (i == trace_probe_log.index) + pos = len; + len += strlen(trace_probe_log.argv[i]) + 1; + } + command = kzalloc(len, GFP_KERNEL); + if (!command) + return; + + /* And make a command string from argv array */ + p = command; + for (i = 0; i < trace_probe_log.argc; i++) { + len = strlen(trace_probe_log.argv[i]); + strcpy(p, trace_probe_log.argv[i]); + p[len] = ' '; + p += len + 1; + } + *(p - 1) = '\0'; + + tracing_log_err(trace_probe_log.subsystem, command, + trace_probe_err_text, err_type, pos + offset); + + kfree(command); +} + /* Split symbol and offset. */ int traceprobe_split_symbol_offset(char *symbol, long *offset) { @@ -156,7 +215,7 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset) /* @buf must has MAX_EVENT_NAME_LEN size */ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, - char *buf) + char *buf, int offset) { const char *slash, *event = *pevent; int len; @@ -164,32 +223,33 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, slash = strchr(event, '/'); if (slash) { if (slash == event) { - pr_info("Group name is not specified\n"); + trace_probe_log_err(offset, NO_GROUP_NAME); return -EINVAL; } if (slash - event + 1 > MAX_EVENT_NAME_LEN) { - pr_info("Group name is too long\n"); - return -E2BIG; + trace_probe_log_err(offset, GROUP_TOO_LONG); + return -EINVAL; } strlcpy(buf, event, slash - event + 1); if (!is_good_name(buf)) { - pr_info("Group name must follow the same rules as C identifiers\n"); + trace_probe_log_err(offset, BAD_GROUP_NAME); return -EINVAL; } *pgroup = buf; *pevent = slash + 1; + offset += slash - event + 1; event = *pevent; } len = strlen(event); if (len == 0) { - pr_info("Event name is not specified\n"); + trace_probe_log_err(offset, NO_EVENT_NAME); return -EINVAL; } else if (len > MAX_EVENT_NAME_LEN) { - pr_info("Event name is too long\n"); - return -E2BIG; + trace_probe_log_err(offset, EVENT_TOO_LONG); + return -EINVAL; } if (!is_good_name(event)) { - pr_info("Event name must follow the same rules as C identifiers\n"); + trace_probe_log_err(offset, BAD_EVENT_NAME); return -EINVAL; } return 0; @@ -198,56 +258,67 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_insn *code, unsigned int flags) + struct fetch_insn *code, unsigned int flags, int offs) { unsigned long param; int ret = 0; int len; if (strcmp(arg, "retval") == 0) { - if (flags & TPARG_FL_RETURN) + if (flags & TPARG_FL_RETURN) { code->op = FETCH_OP_RETVAL; - else + } else { + trace_probe_log_err(offs, RETVAL_ON_PROBE); ret = -EINVAL; + } } else if ((len = str_has_prefix(arg, "stack"))) { if (arg[len] == '\0') { code->op = FETCH_OP_STACKP; } else if (isdigit(arg[len])) { ret = kstrtoul(arg + len, 10, ¶m); - if (ret || ((flags & TPARG_FL_KERNEL) && - param > PARAM_MAX_STACK)) + if (ret) { + goto inval_var; + } else if ((flags & TPARG_FL_KERNEL) && + param > PARAM_MAX_STACK) { + trace_probe_log_err(offs, BAD_STACK_NUM); ret = -EINVAL; - else { + } else { code->op = FETCH_OP_STACK; code->param = (unsigned int)param; } } else - ret = -EINVAL; + goto inval_var; } else if (strcmp(arg, "comm") == 0) { code->op = FETCH_OP_COMM; #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API } else if (((flags & TPARG_FL_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) && (len = str_has_prefix(arg, "arg"))) { - if (!isdigit(arg[len])) - return -EINVAL; ret = kstrtoul(arg + len, 10, ¶m); - if (ret || !param || param > PARAM_MAX_STACK) + if (ret) { + goto inval_var; + } else if (!param || param > PARAM_MAX_STACK) { + trace_probe_log_err(offs, BAD_ARG_NUM); return -EINVAL; + } code->op = FETCH_OP_ARG; code->param = (unsigned int)param - 1; #endif } else - ret = -EINVAL; + goto inval_var; return ret; + +inval_var: + trace_probe_log_err(offs, BAD_VAR); + return -EINVAL; } /* Recursive argument parser */ static int parse_probe_arg(char *arg, const struct fetch_type *type, struct fetch_insn **pcode, struct fetch_insn *end, - unsigned int flags) + unsigned int flags, int offs) { struct fetch_insn *code = *pcode; unsigned long param; @@ -257,7 +328,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, type, code, flags); + ret = parse_probe_vars(arg + 1, type, code, flags, offs); break; case '%': /* named register */ @@ -266,47 +337,57 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->op = FETCH_OP_REG; code->param = (unsigned int)ret; ret = 0; - } + } else + trace_probe_log_err(offs, BAD_REG_NAME); break; case '@': /* memory, file-offset or symbol */ if (isdigit(arg[1])) { ret = kstrtoul(arg + 1, 0, ¶m); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_MEM_ADDR); break; + } /* load address */ code->op = FETCH_OP_IMM; code->immediate = param; } else if (arg[1] == '+') { /* kprobes don't support file offsets */ - if (flags & TPARG_FL_KERNEL) + if (flags & TPARG_FL_KERNEL) { + trace_probe_log_err(offs, FILE_ON_KPROBE); return -EINVAL; - + } ret = kstrtol(arg + 2, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_FILE_OFFS); break; + } code->op = FETCH_OP_FOFFS; code->immediate = (unsigned long)offset; // imm64? } else { /* uprobes don't support symbols */ - if (!(flags & TPARG_FL_KERNEL)) + if (!(flags & TPARG_FL_KERNEL)) { + trace_probe_log_err(offs, SYM_ON_UPROBE); return -EINVAL; - + } /* Preserve symbol for updating */ code->op = FETCH_NOP_SYMBOL; code->data = kstrdup(arg + 1, GFP_KERNEL); if (!code->data) return -ENOMEM; - if (++code == end) - return -E2BIG; - + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } code->op = FETCH_OP_IMM; code->immediate = 0; } /* These are fetching from memory */ - if (++code == end) - return -E2BIG; + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); + return -EINVAL; + } *pcode = code; code->op = FETCH_OP_DEREF; code->offset = offset; @@ -317,28 +398,38 @@ parse_probe_arg(char *arg, const struct fetch_type *type, /* fall through */ case '-': tmp = strchr(arg, '('); - if (!tmp) + if (!tmp) { + trace_probe_log_err(offs, DEREF_NEED_BRACE); return -EINVAL; - + } *tmp = '\0'; ret = kstrtol(arg, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(offs, BAD_DEREF_OFFS); break; - + } + offs += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0); arg = tmp + 1; tmp = strrchr(arg, ')'); - - if (tmp) { + if (!tmp) { + trace_probe_log_err(offs + strlen(arg), + DEREF_OPEN_BRACE); + return -EINVAL; + } else { const struct fetch_type *t2 = find_fetch_type(NULL); *tmp = '\0'; - ret = parse_probe_arg(arg, t2, &code, end, flags); + ret = parse_probe_arg(arg, t2, &code, end, flags, offs); if (ret) break; - if (code->op == FETCH_OP_COMM) + if (code->op == FETCH_OP_COMM) { + trace_probe_log_err(offs, COMM_CANT_DEREF); + return -EINVAL; + } + if (++code == end) { + trace_probe_log_err(offs, TOO_MANY_OPS); return -EINVAL; - if (++code == end) - return -E2BIG; + } *pcode = code; code->op = FETCH_OP_DEREF; @@ -348,6 +439,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, } if (!ret && code->op == FETCH_OP_NOP) { /* Parsed, but do not find fetch method */ + trace_probe_log_err(offs, BAD_FETCH_ARG); ret = -EINVAL; } return ret; @@ -379,7 +471,7 @@ static int __parse_bitfield_probe_arg(const char *bf, return -EINVAL; code++; if (code->op != FETCH_OP_NOP) - return -E2BIG; + return -EINVAL; *pcode = code; code->op = FETCH_OP_MOD_BF; @@ -392,32 +484,53 @@ static int __parse_bitfield_probe_arg(const char *bf, /* String length checking wrapper */ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, - struct probe_arg *parg, unsigned int flags) + struct probe_arg *parg, unsigned int flags, int offset) { struct fetch_insn *code, *scode, *tmp = NULL; - char *t, *t2; + char *t, *t2, *t3; int ret, len; - if (strlen(arg) > MAX_ARGSTR_LEN) { - pr_info("Argument is too long.: %s\n", arg); - return -ENOSPC; + len = strlen(arg); + if (len > MAX_ARGSTR_LEN) { + trace_probe_log_err(offset, ARG_TOO_LONG); + return -EINVAL; + } else if (len == 0) { + trace_probe_log_err(offset, NO_ARG_BODY); + return -EINVAL; } + parg->comm = kstrdup(arg, GFP_KERNEL); - if (!parg->comm) { - pr_info("Failed to allocate memory for command '%s'.\n", arg); + if (!parg->comm) return -ENOMEM; - } + t = strchr(arg, ':'); if (t) { *t = '\0'; t2 = strchr(++t, '['); if (t2) { - *t2 = '\0'; - parg->count = simple_strtoul(t2 + 1, &t2, 0); - if (strcmp(t2, "]") || parg->count == 0) + *t2++ = '\0'; + t3 = strchr(t2, ']'); + if (!t3) { + offset += t2 + strlen(t2) - arg; + trace_probe_log_err(offset, + ARRAY_NO_CLOSE); + return -EINVAL; + } else if (t3[1] != '\0') { + trace_probe_log_err(offset + t3 + 1 - arg, + BAD_ARRAY_SUFFIX); + return -EINVAL; + } + *t3 = '\0'; + if (kstrtouint(t2, 0, &parg->count) || !parg->count) { + trace_probe_log_err(offset + t2 - arg, + BAD_ARRAY_NUM); return -EINVAL; - if (parg->count > MAX_ARRAY_LEN) - return -E2BIG; + } + if (parg->count > MAX_ARRAY_LEN) { + trace_probe_log_err(offset + t2 - arg, + ARRAY_TOO_BIG); + return -EINVAL; + } } } /* @@ -429,7 +542,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, else parg->type = find_fetch_type(t); if (!parg->type) { - pr_info("Unsupported type: %s\n", t); + trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE); return -EINVAL; } parg->offset = *size; @@ -450,7 +563,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], - flags); + flags, offset); if (ret) goto fail; @@ -458,7 +571,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, if (!strcmp(parg->type->name, "string")) { if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) { - pr_info("string only accepts memory or address.\n"); + trace_probe_log_err(offset + (t ? (t - arg) : 0), + BAD_STRING); ret = -EINVAL; goto fail; } @@ -470,7 +584,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, */ code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } } @@ -483,7 +598,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } else { code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } code->op = FETCH_OP_ST_RAW; @@ -493,20 +609,24 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, /* Modify operation */ if (t != NULL) { ret = __parse_bitfield_probe_arg(t, parg->type, &code); - if (ret) + if (ret) { + trace_probe_log_err(offset + t - arg, BAD_BITFIELD); goto fail; + } } /* Loop(Array) operation */ if (parg->count) { if (scode->op != FETCH_OP_ST_MEM && scode->op != FETCH_OP_ST_STRING) { - pr_info("array only accepts memory or address\n"); + trace_probe_log_err(offset + (t ? (t - arg) : 0), + BAD_STRING); ret = -EINVAL; goto fail; } code++; if (code->op != FETCH_OP_NOP) { - ret = -E2BIG; + trace_probe_log_err(offset, TOO_MANY_OPS); + ret = -EINVAL; goto fail; } code->op = FETCH_OP_LP_ARRAY; @@ -555,15 +675,19 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg, { struct probe_arg *parg = &tp->args[i]; char *body; - int ret; /* Increment count for freeing args in error case */ tp->nr_args++; body = strchr(arg, '='); if (body) { - if (body - arg > MAX_ARG_NAME_LEN || body == arg) + if (body - arg > MAX_ARG_NAME_LEN) { + trace_probe_log_err(0, ARG_NAME_TOO_LONG); + return -EINVAL; + } else if (body == arg) { + trace_probe_log_err(0, NO_ARG_NAME); return -EINVAL; + } parg->name = kmemdup_nul(arg, body - arg, GFP_KERNEL); body++; } else { @@ -575,22 +699,16 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg, return -ENOMEM; if (!is_good_name(parg->name)) { - pr_info("Invalid argument[%d] name: %s\n", - i, parg->name); + trace_probe_log_err(0, BAD_ARG_NAME); return -EINVAL; } - if (traceprobe_conflict_field_name(parg->name, tp->args, i)) { - pr_info("Argument[%d]: '%s' conflicts with another field.\n", - i, parg->name); + trace_probe_log_err(0, USED_ARG_NAME); return -EINVAL; } - /* Parse fetch argument */ - ret = traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags); - if (ret) - pr_info("Parse error at argument[%d]. (%d)\n", i, ret); - return ret; + return traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags, + body - arg); } void traceprobe_free_probe_arg(struct probe_arg *arg) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 2177c206de15..b7737666c1a8 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -280,8 +280,8 @@ extern int traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); extern int traceprobe_split_symbol_offset(char *symbol, long *offset); -extern int traceprobe_parse_event_name(const char **pevent, - const char **pgroup, char *buf); +int traceprobe_parse_event_name(const char **pevent, const char **pgroup, + char *buf, int offset); extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return); @@ -298,3 +298,76 @@ extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); #endif extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, size_t offset, struct trace_probe *tp); + +#undef ERRORS +#define ERRORS \ + C(FILE_NOT_FOUND, "Failed to find the given file"), \ + C(NO_REGULAR_FILE, "Not a regular file"), \ + C(BAD_REFCNT, "Invalid reference counter offset"), \ + C(REFCNT_OPEN_BRACE, "Reference counter brace is not closed"), \ + C(BAD_REFCNT_SUFFIX, "Reference counter has wrong suffix"), \ + C(BAD_UPROBE_OFFS, "Invalid uprobe offset"), \ + C(MAXACT_NO_KPROBE, "Maxactive is not for kprobe"), \ + C(BAD_MAXACT, "Invalid maxactive number"), \ + C(MAXACT_TOO_BIG, "Maxactive is too big"), \ + C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \ + C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ + C(NO_GROUP_NAME, "Group name is not specified"), \ + C(GROUP_TOO_LONG, "Group name is too long"), \ + C(BAD_GROUP_NAME, "Group name must follow the same rules as C identifiers"), \ + C(NO_EVENT_NAME, "Event name is not specified"), \ + C(EVENT_TOO_LONG, "Event name is too long"), \ + C(BAD_EVENT_NAME, "Event name must follow the same rules as C identifiers"), \ + C(RETVAL_ON_PROBE, "$retval is not available on probe"), \ + C(BAD_STACK_NUM, "Invalid stack number"), \ + C(BAD_ARG_NUM, "Invalid argument number"), \ + C(BAD_VAR, "Invalid $-valiable specified"), \ + C(BAD_REG_NAME, "Invalid register name"), \ + C(BAD_MEM_ADDR, "Invalid memory address"), \ + C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \ + C(BAD_FILE_OFFS, "Invalid file offset value"), \ + C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \ + C(TOO_MANY_OPS, "Dereference is too much nested"), \ + C(DEREF_NEED_BRACE, "Dereference needs a brace"), \ + C(BAD_DEREF_OFFS, "Invalid dereference offset"), \ + C(DEREF_OPEN_BRACE, "Dereference brace is not closed"), \ + C(COMM_CANT_DEREF, "$comm can not be dereferenced"), \ + C(BAD_FETCH_ARG, "Invalid fetch argument"), \ + C(ARRAY_NO_CLOSE, "Array is not closed"), \ + C(BAD_ARRAY_SUFFIX, "Array has wrong suffix"), \ + C(BAD_ARRAY_NUM, "Invalid array size"), \ + C(ARRAY_TOO_BIG, "Array number is too big"), \ + C(BAD_TYPE, "Unknown type is specified"), \ + C(BAD_STRING, "String accepts only memory argument"), \ + C(BAD_BITFIELD, "Invalid bitfield"), \ + C(ARG_NAME_TOO_LONG, "Argument name is too long"), \ + C(NO_ARG_NAME, "Argument name is not specified"), \ + C(BAD_ARG_NAME, "Argument name must follow the same rules as C identifiers"), \ + C(USED_ARG_NAME, "This argument name is already used"), \ + C(ARG_TOO_LONG, "Argument expression is too long"), \ + C(NO_ARG_BODY, "No argument expression"), \ + C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\ + C(FAIL_REG_PROBE, "Failed to register probe event"), + +#undef C +#define C(a, b) TP_ERR_##a + +/* Define TP_ERR_ */ +enum { ERRORS }; + +/* Error text is defined in trace_probe.c */ + +struct trace_probe_log { + const char *subsystem; + const char **argv; + int argc; + int index; +}; + +void trace_probe_log_init(const char *subsystem, int argc, const char **argv); +void trace_probe_log_set_index(int index); +void trace_probe_log_clear(void); +void __trace_probe_log_err(int offset, int err); + +#define trace_probe_log_err(offs, err) \ + __trace_probe_log_err(offs, TP_ERR_##err) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index be78d99ee6bc..cd8750a72768 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -457,13 +457,19 @@ static int trace_uprobe_create(int argc, const char **argv) return -ECANCELED; } + trace_probe_log_init("trace_uprobe", argc, argv); + trace_probe_log_set_index(1); /* filename is the 2nd argument */ + *arg++ = '\0'; ret = kern_path(filename, LOOKUP_FOLLOW, &path); if (ret) { + trace_probe_log_err(0, FILE_NOT_FOUND); kfree(filename); + trace_probe_log_clear(); return ret; } if (!d_is_reg(path.dentry)) { + trace_probe_log_err(0, NO_REGULAR_FILE); ret = -EINVAL; goto fail_address_parse; } @@ -472,9 +478,16 @@ static int trace_uprobe_create(int argc, const char **argv) rctr = strchr(arg, '('); if (rctr) { rctr_end = strchr(rctr, ')'); - if (rctr > rctr_end || *(rctr_end + 1) != 0) { + if (!rctr_end) { + ret = -EINVAL; + rctr_end = rctr + strlen(rctr); + trace_probe_log_err(rctr_end - filename, + REFCNT_OPEN_BRACE); + goto fail_address_parse; + } else if (rctr_end[1] != '\0') { ret = -EINVAL; - pr_info("Invalid reference counter offset.\n"); + trace_probe_log_err(rctr_end + 1 - filename, + BAD_REFCNT_SUFFIX); goto fail_address_parse; } @@ -482,22 +495,23 @@ static int trace_uprobe_create(int argc, const char **argv) *rctr_end = '\0'; ret = kstrtoul(rctr, 0, &ref_ctr_offset); if (ret) { - pr_info("Invalid reference counter offset.\n"); + trace_probe_log_err(rctr - filename, BAD_REFCNT); goto fail_address_parse; } } /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); - if (ret) + if (ret) { + trace_probe_log_err(arg - filename, BAD_UPROBE_OFFS); goto fail_address_parse; - - argc -= 2; - argv += 2; + } /* setup a probe */ + trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf); + ret = traceprobe_parse_event_name(&event, &group, buf, + event - argv[0]); if (ret) goto fail_address_parse; } else { @@ -519,6 +533,9 @@ static int trace_uprobe_create(int argc, const char **argv) kfree(tail); } + argc -= 2; + argv += 2; + tu = alloc_trace_uprobe(group, event, argc, is_return); if (IS_ERR(tu)) { ret = PTR_ERR(tu); @@ -539,6 +556,7 @@ static int trace_uprobe_create(int argc, const char **argv) goto error; } + trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tu->tp, i, tmp, is_return ? TPARG_FL_RETURN : 0); kfree(tmp); @@ -547,20 +565,20 @@ static int trace_uprobe_create(int argc, const char **argv) } ret = register_trace_uprobe(tu); - if (ret) - goto error; - return 0; + if (!ret) + goto out; error: free_trace_uprobe(tu); +out: + trace_probe_log_clear(); return ret; fail_address_parse: + trace_probe_log_clear(); path_put(&path); kfree(filename); - pr_info("Failed to parse address or file.\n"); - return ret; } -- cgit From 06ee7115b0d1742de745ad143fb5e06d77d27fba Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:40 -0700 Subject: bpf: add verifier stats and log_level bit 2 In order to understand the verifier bottlenecks add various stats and extend log_level: log_level 1 and 2 are kept as-is: bit 0 - level=1 - print every insn and verifier state at branch points bit 1 - level=2 - print every insn and verifier state at every insn bit 2 - level=4 - print verifier error and stats at the end of verification When verifier rejects the program the libbpf is trying to load the program twice. Once with log_level=0 (no messages, only error code is reported to user space) and second time with log_level=1 to tell the user why the verifier rejected it. With introduction of bit 2 - level=4 the libbpf can choose to always use that level and load programs once, since the verification speed is not affected and in case of error the verbose message will be available. Note that the verifier stats are not part of uapi just like all other verbose messages. They're expected to change in the future. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 21 ++++++++++++ kernel/bpf/verifier.c | 76 ++++++++++++++++++++++++++++++-------------- 2 files changed, 73 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7d8228d1c898..f7e15eeb60bb 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -248,6 +248,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) return log->len_used >= log->len_total - 1; } +#define BPF_LOG_LEVEL1 1 +#define BPF_LOG_LEVEL2 2 +#define BPF_LOG_STATS 4 +#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) +#define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) + static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { return log->level && log->ubuf && !bpf_verifier_log_full(log); @@ -284,6 +290,21 @@ struct bpf_verifier_env { struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; u32 subprog_cnt; + /* number of instructions analyzed by the verifier */ + u32 insn_processed; + /* total verification time */ + u64 verification_time; + /* maximum number of verifier states kept in 'branching' instructions */ + u32 max_states_per_insn; + /* total number of allocated verifier states */ + u32 total_states; + /* some states are freed during program analysis. + * this is peak number of states. this number dominates kernel + * memory consumption during verification + */ + u32 peak_states; + /* longest register parentage chain walked for liveness marking */ + u32 longest_mark_read_walk; }; __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 87221fda1321..e2001c1e40b3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1092,7 +1092,7 @@ static int check_subprogs(struct bpf_verifier_env *env) */ subprog[env->subprog_cnt].start = insn_cnt; - if (env->log.level > 1) + if (env->log.level & BPF_LOG_LEVEL2) for (i = 0; i < env->subprog_cnt; i++) verbose(env, "func#%d @%d\n", i, subprog[i].start); @@ -1139,6 +1139,7 @@ static int mark_reg_read(struct bpf_verifier_env *env, struct bpf_reg_state *parent) { bool writes = parent == state->parent; /* Observe write marks */ + int cnt = 0; while (parent) { /* if read wasn't screened by an earlier write ... */ @@ -1155,7 +1156,11 @@ static int mark_reg_read(struct bpf_verifier_env *env, state = parent; parent = state->parent; writes = true; + cnt++; } + + if (env->longest_mark_read_walk < cnt) + env->longest_mark_read_walk = cnt; return 0; } @@ -1455,7 +1460,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, * need to try adding each of min_value and max_value to off * to make sure our theoretical access will be safe. */ - if (env->log.level) + if (env->log.level & BPF_LOG_LEVEL) print_verifier_state(env, state); /* The minimum value is only important with signed @@ -2938,7 +2943,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, /* and go analyze first insn of the callee */ *insn_idx = target_insn; - if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); print_verifier_state(env, caller); verbose(env, "callee:\n"); @@ -2978,7 +2983,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return err; *insn_idx = callee->callsite + 1; - if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); print_verifier_state(env, callee); verbose(env, "to caller at %d:\n", *insn_idx); @@ -5001,7 +5006,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, insn->dst_reg); return -EACCES; } - if (env->log.level) + if (env->log.level & BPF_LOG_LEVEL) print_verifier_state(env, this_branch->frame[this_branch->curframe]); return 0; } @@ -6181,6 +6186,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) states_cnt++; } + if (env->max_states_per_insn < states_cnt) + env->max_states_per_insn = states_cnt; + if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) return 0; @@ -6194,6 +6202,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); if (!new_sl) return -ENOMEM; + env->total_states++; + env->peak_states++; /* add new state to the head of linked list */ new = &new_sl->state; @@ -6278,8 +6288,7 @@ static int do_check(struct bpf_verifier_env *env) struct bpf_verifier_state *state; struct bpf_insn *insns = env->prog->insnsi; struct bpf_reg_state *regs; - int insn_cnt = env->prog->len, i; - int insn_processed = 0; + int insn_cnt = env->prog->len; bool do_print_state = false; env->prev_linfo = NULL; @@ -6314,10 +6323,10 @@ static int do_check(struct bpf_verifier_env *env) insn = &insns[env->insn_idx]; class = BPF_CLASS(insn->code); - if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { + if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { verbose(env, "BPF program is too large. Processed %d insn\n", - insn_processed); + env->insn_processed); return -E2BIG; } @@ -6326,7 +6335,7 @@ static int do_check(struct bpf_verifier_env *env) return err; if (err == 1) { /* found equivalent state, can prune the search */ - if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { if (do_print_state) verbose(env, "\nfrom %d to %d%s: safe\n", env->prev_insn_idx, env->insn_idx, @@ -6344,8 +6353,9 @@ static int do_check(struct bpf_verifier_env *env) if (need_resched()) cond_resched(); - if (env->log.level > 1 || (env->log.level && do_print_state)) { - if (env->log.level > 1) + if (env->log.level & BPF_LOG_LEVEL2 || + (env->log.level & BPF_LOG_LEVEL && do_print_state)) { + if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "%d:", env->insn_idx); else verbose(env, "\nfrom %d to %d%s:", @@ -6356,7 +6366,7 @@ static int do_check(struct bpf_verifier_env *env) do_print_state = false; } - if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { const struct bpf_insn_cbs cbs = { .cb_print = verbose, .private_data = env, @@ -6621,16 +6631,6 @@ process_bpf_exit: env->insn_idx++; } - verbose(env, "processed %d insns (limit %d), stack depth ", - insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); - for (i = 0; i < env->subprog_cnt; i++) { - u32 depth = env->subprog_info[i].stack_depth; - - verbose(env, "%d", depth); - if (i + 1 < env->subprog_cnt) - verbose(env, "+"); - } - verbose(env, "\n"); env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; return 0; } @@ -7854,9 +7854,34 @@ static void free_states(struct bpf_verifier_env *env) kfree(env->explored_states); } +static void print_verification_stats(struct bpf_verifier_env *env) +{ + int i; + + if (env->log.level & BPF_LOG_STATS) { + verbose(env, "verification time %lld usec\n", + div_u64(env->verification_time, 1000)); + verbose(env, "stack depth "); + for (i = 0; i < env->subprog_cnt; i++) { + u32 depth = env->subprog_info[i].stack_depth; + + verbose(env, "%d", depth); + if (i + 1 < env->subprog_cnt) + verbose(env, "+"); + } + verbose(env, "\n"); + } + verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " + "total_states %d peak_states %d mark_read %d\n", + env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, + env->max_states_per_insn, env->total_states, + env->peak_states, env->longest_mark_read_walk); +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr) { + u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; struct bpf_verifier_log *log; int i, len, ret = -EINVAL; @@ -7899,7 +7924,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, ret = -EINVAL; /* log attributes have to be sane */ if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || - !log->level || !log->ubuf) + !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) goto err_unlock; } @@ -7980,6 +8005,9 @@ skip_full_check: if (ret == 0) ret = fixup_call_args(env); + env->verification_time = ktime_get_ns() - start_time; + print_verification_stats(env); + if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; if (log->level && !log->ubuf) { -- cgit From 9f4686c41bdff051f557accb531af79dd1773687 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:41 -0700 Subject: bpf: improve verification speed by droping states Branch instructions, branch targets and calls in a bpf program are the places where the verifier remembers states that led to successful verification of the program. These states are used to prune brute force program analysis. For unprivileged programs there is a limit of 64 states per such 'branching' instructions (maximum length is tracked by max_states_per_insn counter introduced in the previous patch). Simply reducing this threshold to 32 or lower increases insn_processed metric to the point that small valid programs get rejected. For root programs there is no limit and cilium programs can have max_states_per_insn to be 100 or higher. Walking 100+ states multiplied by number of 'branching' insns during verification consumes significant amount of cpu time. Turned out simple LRU-like mechanism can be used to remove states that unlikely will be helpful in future search pruning. This patch introduces hit_cnt and miss_cnt counters: hit_cnt - this many times this state successfully pruned the search miss_cnt - this many times this state was not equivalent to other states (and that other states were added to state list) The heuristic introduced in this patch is: if (sl->miss_cnt > sl->hit_cnt * 3 + 3) /* drop this state from future considerations */ Higher numbers increase max_states_per_insn (allow more states to be considered for pruning) and slow verification speed, but do not meaningfully reduce insn_processed metric. Lower numbers drop too many states and insn_processed increases too much. Many different formulas were considered. This one is simple and works well enough in practice. (the analysis was done on selftests/progs/* and on cilium programs) The end result is this heuristic improves verification speed by 10 times. Large synthetic programs that used to take a second more now take 1/10 of a second. In cases where max_states_per_insn used to be 100 or more, now it's ~10. There is a slight increase in insn_processed for cilium progs: before after bpf_lb-DLB_L3.o 1831 1838 bpf_lb-DLB_L4.o 3029 3218 bpf_lb-DUNKNOWN.o 1064 1064 bpf_lxc-DDROP_ALL.o 26309 26935 bpf_lxc-DUNKNOWN.o 33517 34439 bpf_netdev.o 9713 9721 bpf_overlay.o 6184 6184 bpf_lcx_jit.o 37335 39389 And 2-3 times improvement in the verification speed. Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 2 ++ kernel/bpf/verifier.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f7e15eeb60bb..fc8254d6b569 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -207,6 +207,7 @@ struct bpf_verifier_state { struct bpf_verifier_state_list { struct bpf_verifier_state state; struct bpf_verifier_state_list *next; + int miss_cnt, hit_cnt; }; /* Possible states for alu_state member. */ @@ -280,6 +281,7 @@ struct bpf_verifier_env { bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + struct bpf_verifier_state_list *free_list; struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e2001c1e40b3..a636db4a7a4e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6152,11 +6152,13 @@ static int propagate_liveness(struct bpf_verifier_env *env, static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; - struct bpf_verifier_state_list *sl; + struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0; - sl = env->explored_states[insn_idx]; + pprev = &env->explored_states[insn_idx]; + sl = *pprev; + if (!sl) /* this 'insn_idx' instruction wasn't marked, so we will not * be doing state search here @@ -6167,6 +6169,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) while (sl != STATE_LIST_MARK) { if (states_equal(env, &sl->state, cur)) { + sl->hit_cnt++; /* reached equivalent register/stack state, * prune the search. * Registers read by the continuation are read by us. @@ -6182,8 +6185,35 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return err; return 1; } - sl = sl->next; states_cnt++; + sl->miss_cnt++; + /* heuristic to determine whether this state is beneficial + * to keep checking from state equivalence point of view. + * Higher numbers increase max_states_per_insn and verification time, + * but do not meaningfully decrease insn_processed. + */ + if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { + /* the state is unlikely to be useful. Remove it to + * speed up verification + */ + *pprev = sl->next; + if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { + free_verifier_state(&sl->state, false); + kfree(sl); + env->peak_states--; + } else { + /* cannot free this state, since parentage chain may + * walk it later. Add it for free_list instead to + * be freed at the end of verification + */ + sl->next = env->free_list; + env->free_list = sl; + } + sl = *pprev; + continue; + } + pprev = &sl->next; + sl = *pprev; } if (env->max_states_per_insn < states_cnt) @@ -7836,6 +7866,14 @@ static void free_states(struct bpf_verifier_env *env) struct bpf_verifier_state_list *sl, *sln; int i; + sl = env->free_list; + while (sl) { + sln = sl->next; + free_verifier_state(&sl->state, false); + kfree(sl); + sl = sln; + } + if (!env->explored_states) return; -- cgit From 25af32dad8047d180e70e233c85b909dd6587cc5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:42 -0700 Subject: bpf: improve verification speed by not remarking live_read With large verifier speed improvement brought by the previous patch mark_reg_read() becomes the hottest function during verification. On a typical program it consumes 40% of cpu. mark_reg_read() walks parentage chain of registers to mark parents as LIVE_READ. Once the register is marked there is no need to remark it again in the future. Hence stop walking the chain once first LIVE_READ is seen. This optimization drops mark_reg_read() time from 40% of cpu to <1% and overall 2x improvement of verification speed. For some programs the longest_mark_read_walk counter improves from ~500 to ~5 Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Kicinski Reviewed-by: Edward Cree Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a636db4a7a4e..94cf6efc5df6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1151,6 +1151,15 @@ static int mark_reg_read(struct bpf_verifier_env *env, parent->var_off.value, parent->off); return -EFAULT; } + if (parent->live & REG_LIVE_READ) + /* The parentage chain never changes and + * this parent was already marked as LIVE_READ. + * There is no need to keep walking the chain again and + * keep re-marking all parents as LIVE_READ. + * This case happens when the same register is read + * multiple times without writes into it in-between. + */ + break; /* ... then we depend on parent's value */ parent->live |= REG_LIVE_READ; state = parent; -- cgit From 71dde681a8cea1ccff2c7b3be83c043ab6b2a977 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:43 -0700 Subject: bpf: convert temp arrays to kvcalloc Temporary arrays used during program verification need to be vmalloc-ed to support large bpf programs. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 94cf6efc5df6..ad3494a881da 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5313,13 +5313,13 @@ static int check_cfg(struct bpf_verifier_env *env) int ret = 0; int i, t; - insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) return -ENOMEM; - insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_stack) { - kfree(insn_state); + kvfree(insn_state); return -ENOMEM; } @@ -5417,8 +5417,8 @@ check_state: ret = 0; /* cfg looks good */ err_free: - kfree(insn_state); - kfree(insn_stack); + kvfree(insn_state); + kvfree(insn_stack); return ret; } @@ -7898,7 +7898,7 @@ static void free_states(struct bpf_verifier_env *env) } } - kfree(env->explored_states); + kvfree(env->explored_states); } static void print_verification_stats(struct bpf_verifier_env *env) @@ -7994,7 +7994,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, goto skip_full_check; } - env->explored_states = kcalloc(env->prog->len, + env->explored_states = kvcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; -- cgit From 4f73379ec5c2891598aa715c6df7ac9afdc86fbf Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:44 -0700 Subject: bpf: verbose jump offset overflow check Larger programs may trigger 16-bit jump offset overflow check during instruction patching. Make this error verbose otherwise users cannot decipher error code without printks in the verifier. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/core.c | 11 ++++++----- kernel/bpf/verifier.c | 7 ++++++- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ff09d32a8a1b..2966cb368bf4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -438,6 +438,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; const u32 cnt_max = S16_MAX; struct bpf_prog *prog_adj; + int err; /* Since our patchlet doesn't expand the image, we're done. */ if (insn_delta == 0) { @@ -453,8 +454,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * we afterwards may not fail anymore. */ if (insn_adj_cnt > cnt_max && - bpf_adj_branches(prog, off, off + 1, off + len, true)) - return NULL; + (err = bpf_adj_branches(prog, off, off + 1, off + len, true))) + return ERR_PTR(err); /* Several new instructions need to be inserted. Make room * for them. Likely, there's no need for a new allocation as @@ -463,7 +464,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), GFP_USER); if (!prog_adj) - return NULL; + return ERR_PTR(-ENOMEM); prog_adj->len = insn_adj_cnt; @@ -1096,13 +1097,13 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) continue; tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); - if (!tmp) { + if (IS_ERR(tmp)) { /* Patching may have repointed aux->prog during * realloc from the original one, so we need to * fix it up here on error. */ bpf_jit_prog_release_other(prog, clone); - return ERR_PTR(-ENOMEM); + return tmp; } clone = tmp; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ad3494a881da..6dcfeb44bb8e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6932,8 +6932,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of struct bpf_prog *new_prog; new_prog = bpf_patch_insn_single(env->prog, off, patch, len); - if (!new_prog) + if (IS_ERR(new_prog)) { + if (PTR_ERR(new_prog) == -ERANGE) + verbose(env, + "insn %d cannot be patched due to 16-bit range\n", + env->insn_aux_data[off].orig_idx); return NULL; + } if (adjust_insn_aux_data(env, new_prog->len, off, len)) return NULL; adjust_subprog_starts(env, off, len); -- cgit From c04c0d2b968ac45d6ef020316808ef6c82325a82 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:45 -0700 Subject: bpf: increase complexity limit and maximum program size Large verifier speed improvements allow to increase verifier complexity limit. Now regardless of the program composition and its size it takes little time for the verifier to hit insn_processed limit. On typical x86 machine non-debug kernel processes 1M instructions in 1/10 of a second. (before these speed improvements specially crafted programs could be hitting multi-second verification times) Full kasan kernel with debug takes ~1 second for the same 1M insns. Hence bump the BPF_COMPLEXITY_LIMIT_INSNS limit to 1M. Also increase the number of instructions per program from 4k to internal BPF_COMPLEXITY_LIMIT_INSNS limit. 4k limit was confusing to users, since small programs with hundreds of insns could be hitting BPF_COMPLEXITY_LIMIT_INSNS limit. Sometimes adding more insns and bpf_trace_printk debug statements would make the verifier accept the program while removing code would make the verifier reject it. Some user space application started to add #define MAX_FOO to their programs and do: MAX_FOO=100; again: compile with MAX_FOO; try to load; if (fails_to_load) { reduce MAX_FOO; goto again; } to be able to fit maximum amount of processing into single program. Other users artificially split their single program into a set of programs and use all 32 iterations of tail_calls to increase compute limits. And the most advanced folks used unlimited tc-bpf filter list to execute many bpf programs. Essentially the users managed to workaround 4k insn limit. This patch removes the limit for root programs from uapi. BPF_COMPLEXITY_LIMIT_INSNS is the kernel internal limit and success to load the program no longer depends on program size, but on 'smartness' of the verifier only. The verifier will continue to get smarter with every kernel release. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + kernel/bpf/syscall.c | 3 ++- kernel/bpf/verifier.c | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f62897198844..a445194b5fb6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -421,6 +421,7 @@ struct bpf_array { }; }; +#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 32 struct bpf_event_entry { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index afca36f53c49..1d65e56594db 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1557,7 +1557,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) /* eBPF programs must be GPL compatible to use GPL-ed functions */ is_gpl = license_is_gpl_compatible(license); - if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) + if (attr->insn_cnt == 0 || + attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) return -E2BIG; if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6dcfeb44bb8e..b631e89e7a51 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -176,7 +176,6 @@ struct bpf_verifier_stack_elem { struct bpf_verifier_stack_elem *next; }; -#define BPF_COMPLEXITY_LIMIT_INSNS 131072 #define BPF_COMPLEXITY_LIMIT_STACK 1024 #define BPF_COMPLEXITY_LIMIT_STATES 64 -- cgit From 7a9f5c65abcc9644b11738ca0815510cb5510eaf Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:46 -0700 Subject: bpf: increase verifier log limit The existing 16Mbyte verifier log limit is not enough for log_level=2 even for small programs. Increase it to 1Gbyte. Note it's not a kernel memory limit. It's an amount of memory user space provides to store the verifier log. The kernel populates it 1k at a time. Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b631e89e7a51..bb27b675923c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7974,7 +7974,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, ret = -EINVAL; /* log attributes have to be sane */ - if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || + if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) goto err_unlock; } -- cgit From d6e486ee0ef2f99a4069d9186e53dac61b28cb3c Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 3 Apr 2019 16:03:54 -0700 Subject: cgroup: remove extra cgroup_migrate_finish() call The callers of cgroup_migrate_prepare_dst() correctly call cgroup_migrate_finish() for success and failure cases both. No need to call it in cgroup_migrate_prepare_dst() in failure case. Signed-off-by: Shakeel Butt Reviewed-by: Daniel Jordan Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3f2b4bde0f9c..f219c195a9a5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2602,7 +2602,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); if (!dst_cset) - goto err; + return -ENOMEM; WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset); @@ -2634,9 +2634,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) } return 0; -err: - cgroup_migrate_finish(mgctx); - return -ENOMEM; } /** -- cgit From 9419a3191dcb27f24478d288abaab697228d28e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 4 Apr 2019 21:04:13 -0400 Subject: acct_on(): don't mess with freeze protection What happens there is that we are replacing file->path.mnt of a file we'd just opened with a clone and we need the write count contribution to be transferred from original mount to new one. That's it. We do *NOT* want any kind of freeze protection for the duration of switchover. IOW, we should just use __mnt_{want,drop}_write() for that switchover; no need to bother with mnt_{want,drop}_write() there. Tested-by: Amir Goldstein Reported-by: syzbot+2a73a6ea9507b7112141@syzkaller.appspotmail.com Signed-off-by: Al Viro --- fs/internal.h | 2 -- include/linux/mount.h | 2 ++ kernel/acct.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/fs/internal.h b/fs/internal.h index 6a8b71643af4..2e7362837a6e 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -89,9 +89,7 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -extern int __mnt_want_write(struct vfsmount *); extern int __mnt_want_write_file(struct file *); -extern void __mnt_drop_write(struct vfsmount *); extern void __mnt_drop_write_file(struct file *); /* diff --git a/include/linux/mount.h b/include/linux/mount.h index 9197ddbf35fb..bf8cc4108b8f 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -87,6 +87,8 @@ extern bool mnt_may_suid(struct vfsmount *mnt); struct path; extern struct vfsmount *clone_private_mount(const struct path *path); +extern int __mnt_want_write(struct vfsmount *); +extern void __mnt_drop_write(struct vfsmount *); struct file_system_type; extern struct vfsmount *fc_mount(struct fs_context *fc); diff --git a/kernel/acct.c b/kernel/acct.c index addf7732fb56..81f9831a7859 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -227,7 +227,7 @@ static int acct_on(struct filename *pathname) filp_close(file, NULL); return PTR_ERR(internal); } - err = mnt_want_write(internal); + err = __mnt_want_write(internal); if (err) { mntput(internal); kfree(acct); @@ -252,7 +252,7 @@ static int acct_on(struct filename *pathname) old = xchg(&ns->bacct, &acct->pin); mutex_unlock(&acct->lock); pin_kill(old); - mnt_drop_write(mnt); + __mnt_drop_write(mnt); mntput(mnt); return 0; } -- cgit From f2bcd05ec7b839ff826d2008506ad2d2dff46a59 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 3 Apr 2019 23:22:37 -0700 Subject: bpf: Reject indirect var_off stack access in raw mode It's hard to guarantee that whole memory is marked as initialized on helper return if uninitialized stack is accessed with variable offset since specific bounds are unknown to verifier. This may cause uninitialized stack leaking. Reject such an access in check_stack_boundary to prevent possible leaking. There are no known use-cases for indirect uninitialized stack access with variable offset so it shouldn't break anything. Fixes: 2011fccfb61b ("bpf: Support variable offset stack access from helpers") Reported-by: Daniel Borkmann Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb27b675923c..0f12fda35626 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2226,6 +2226,15 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, if (err) return err; } else { + /* Only initialized buffer on stack is allowed to be accessed + * with variable offset. With uninitialized buffer it's hard to + * guarantee that whole memory is marked as initialized on + * helper return since specific bounds are unknown what may + * cause uninitialized stack leaking. + */ + if (meta && meta->raw_mode) + meta = NULL; + min_off = reg->smin_value + reg->off; max_off = reg->umax_value + reg->off; err = __check_stack_boundary(env, regno, min_off, access_size, -- cgit From 088ec26d9c2da9d879ab73e3f4117f9df6c566ee Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 3 Apr 2019 23:22:39 -0700 Subject: bpf: Reject indirect var_off stack access in unpriv mode Proper support of indirect stack access with variable offset in unprivileged mode (!root) requires corresponding support in Spectre masking for stack ALU in retrieve_ptr_limit(). There are no use-case for variable offset in unprivileged mode though so make verifier reject such accesses for simplicity. Pointer arithmetics is one (and only?) way to cause variable offset and it's already rejected in unpriv mode so that verifier won't even get to helper function whose argument contains variable offset, e.g.: 0: (7a) *(u64 *)(r10 -16) = 0 1: (7a) *(u64 *)(r10 -8) = 0 2: (61) r2 = *(u32 *)(r1 +0) 3: (57) r2 &= 4 4: (17) r2 -= 16 5: (0f) r2 += r10 variable stack access var_off=(0xfffffffffffffff0; 0x4) off=-16 size=1R2 stack pointer arithmetic goes out of range, prohibited for !root Still it looks like a good idea to reject variable offset indirect stack access for unprivileged mode in check_stack_boundary() explicitly. Fixes: 2011fccfb61b ("bpf: Support variable offset stack access from helpers") Reported-by: Daniel Borkmann Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0f12fda35626..8400c1f33cd4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2226,6 +2226,19 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, if (err) return err; } else { + /* Variable offset is prohibited for unprivileged mode for + * simplicity since it requires corresponding support in + * Spectre masking for stack ALU. + * See also retrieve_ptr_limit(). + */ + if (!env->allow_ptr_leaks) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", + regno, tn_buf); + return -EACCES; + } /* Only initialized buffer on stack is allowed to be accessed * with variable offset. With uninitialized buffer it's hard to * guarantee that whole memory is marked as initialized on @@ -3339,6 +3352,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, switch (ptr_reg->type) { case PTR_TO_STACK: + /* Indirect variable offset stack access is prohibited in + * unprivileged mode so it's not handled here. + */ off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) *ptr_limit = MAX_BPF_STACK + off; -- cgit From 107c26a70ca81bfc33657366ad69d02fdc9efc9d Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 3 Apr 2019 23:22:41 -0700 Subject: bpf: Sanity check max value for var_off stack access As discussed in [1] max value of variable offset has to be checked for overflow on stack access otherwise verifier would accept code like this: 0: (b7) r2 = 6 1: (b7) r3 = 28 2: (7a) *(u64 *)(r10 -16) = 0 3: (7a) *(u64 *)(r10 -8) = 0 4: (79) r4 = *(u64 *)(r1 +168) 5: (c5) if r4 s< 0x0 goto pc+4 R1=ctx(id=0,off=0,imm=0) R2=inv6 R3=inv28 R4=inv(id=0,umax_value=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0,call_-1 fp-8=mmmmmmmm fp-16=mmmmmmmm 6: (17) r4 -= 16 7: (0f) r4 += r10 8: (b7) r5 = 8 9: (85) call bpf_getsockopt#57 10: (b7) r0 = 0 11: (95) exit , where R4 obviosly has unbounded max value. Fix it by checking that reg->smax_value is inside (-BPF_MAX_VAR_OFF; BPF_MAX_VAR_OFF) range. reg->smax_value is used instead of reg->umax_value because stack pointers are calculated using negative offset from fp. This is opposite to e.g. map access where offset must be non-negative and where umax_value is used. Also dedicated verbose logs are added for both min and max bound check failures to have diagnostics consistent with variable offset handling in check_map_access(). [1] https://marc.info/?l=linux-netdev&m=155433357510597&w=2 Fixes: 2011fccfb61b ("bpf: Support variable offset stack access from helpers") Reported-by: Daniel Borkmann Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8400c1f33cd4..f2d600199e66 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2248,16 +2248,28 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, if (meta && meta->raw_mode) meta = NULL; + if (reg->smax_value >= BPF_MAX_VAR_OFF || + reg->smax_value <= -BPF_MAX_VAR_OFF) { + verbose(env, "R%d unbounded indirect variable offset stack access\n", + regno); + return -EACCES; + } min_off = reg->smin_value + reg->off; - max_off = reg->umax_value + reg->off; + max_off = reg->smax_value + reg->off; err = __check_stack_boundary(env, regno, min_off, access_size, zero_size_allowed); - if (err) + if (err) { + verbose(env, "R%d min value is outside of stack bound\n", + regno); return err; + } err = __check_stack_boundary(env, regno, max_off, access_size, zero_size_allowed); - if (err) + if (err) { + verbose(env, "R%d max value is outside of stack bound\n", + regno); return err; + } } if (meta && meta->raw_mode) { -- cgit From 1fbd20f8b77b366ea4aeb92ade72daa7f36a7e3b Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 3 Apr 2019 23:22:43 -0700 Subject: bpf: Add missed newline in verifier verbose log check_stack_access() that prints verbose log is used in adjust_ptr_min_max_vals() that prints its own verbose log and now they stick together, e.g.: variable stack access var_off=(0xfffffffffffffff0; 0x4) off=-16 size=1R2 stack pointer arithmetic goes out of range, prohibited for !root Add missing newline so that log is more readable: variable stack access var_off=(0xfffffffffffffff0; 0x4) off=-16 size=1 R2 stack pointer arithmetic goes out of range, prohibited for !root Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2d600199e66..48718e1da16d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1426,7 +1426,7 @@ static int check_stack_access(struct bpf_verifier_env *env, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable stack access var_off=%s off=%d size=%d", + verbose(env, "variable stack access var_off=%s off=%d size=%d\n", tn_buf, off, size); return -EACCES; } -- cgit From 12f2639038ef420fe796171ffb810b30d1ac0619 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 5 Apr 2019 21:46:12 +0200 Subject: tracing: stop making gpio tracing configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gpio tracing was made configurable in 4.4-rc1 (commit ddd70280bf0e ("tracing: gpio: Add Kconfig option for enabling/disabling trace events")). Since then it is the only event type that can be compiled conditionally. Given that there is only little overhead I don't understand the reasoning and I was annoyed more than once that gpio events were not available without recompiling. So drop the Kconfig symbol and make gpio events available unconditionally. Signed-off-by: Uwe Kleine-König Signed-off-by: Linus Walleij --- arch/arm/configs/aspeed_g4_defconfig | 1 - arch/arm/configs/aspeed_g5_defconfig | 1 - include/trace/events/gpio.h | 4 ---- kernel/trace/Kconfig | 7 ------- 4 files changed, 13 deletions(-) (limited to 'kernel') diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig index 1446262921b4..0f3638dd576b 100644 --- a/arch/arm/configs/aspeed_g4_defconfig +++ b/arch/arm/configs/aspeed_g4_defconfig @@ -248,7 +248,6 @@ CONFIG_PANIC_TIMEOUT=-1 # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_STACK_END_CHECK=y CONFIG_FUNCTION_TRACER=y -# CONFIG_TRACING_EVENTS_GPIO is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_DEBUG_WX=y CONFIG_DEBUG_USER=y diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index 02fa3a41add5..eaab776c4ac2 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -248,7 +248,6 @@ CONFIG_PANIC_TIMEOUT=-1 # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_STACK_END_CHECK=y CONFIG_FUNCTION_TRACER=y -# CONFIG_TRACING_EVENTS_GPIO is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_DEBUG_WX=y CONFIG_DEBUG_USER=y diff --git a/include/trace/events/gpio.h b/include/trace/events/gpio.h index 5c189a22c489..3aa9fd86d748 100644 --- a/include/trace/events/gpio.h +++ b/include/trace/events/gpio.h @@ -2,10 +2,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM gpio -#ifndef CONFIG_TRACING_EVENTS_GPIO -#define NOTRACE -#endif - #if !defined(_TRACE_GPIO_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_GPIO_H diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8bd1d6d001d7..5d965cef6c77 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -774,13 +774,6 @@ config TRACE_EVAL_MAP_FILE If unsure, say N -config TRACING_EVENTS_GPIO - bool "Trace gpio events" - depends on GPIOLIB - default y - help - Enable tracing events for gpio subsystem - config GCOV_PROFILE_FTRACE bool "Enable GCOV profiling on ftrace subsystem" depends on GCOV_KERNEL -- cgit From 1e144d73f7295f766568c357448a11eb12868e29 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 Apr 2019 16:07:48 -0400 Subject: tracing: Add trace_array parameter to create_event_filter() Pass in the trace_array that represents the instance the filter being changed is in to create_event_filter(). This will allow for error messages that happen when writing to the filter can be displayed in the proper instance "error_log" file. Note, for calls to create_filter() (that was also modified to support create_event_filter()), that changes filters that do not exist in a instance (for perf for example), NULL may be passed in, which means that there will not be any message to log for that filter. Reviewed-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 3 ++- kernel/trace/trace_events_filter.c | 25 ++++++++++++++----------- kernel/trace/trace_events_trigger.c | 3 ++- 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b711edbef7e7..809c5d7f0064 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1553,7 +1553,8 @@ extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); -extern int create_event_filter(struct trace_event_call *call, +extern int create_event_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 290d42c59101..2b63930cd3e6 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -920,7 +920,8 @@ static void remove_filter_string(struct event_filter *filter) filter->filter_string = NULL; } -static void append_filter_err(struct filter_parse_error *pe, +static void append_filter_err(struct trace_array *tr, + struct filter_parse_error *pe, struct event_filter *filter) { struct trace_seq *s; @@ -1607,7 +1608,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, if (err) { filter_disable(file); parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); - append_filter_err(pe, filter); + append_filter_err(tr, pe, filter); } else event_set_filtered_flag(file); @@ -1719,7 +1720,8 @@ static void create_filter_finish(struct filter_parse_error *pe) * information if @set_str is %true and the caller is responsible for * freeing it. */ -static int create_filter(struct trace_event_call *call, +static int create_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_string, bool set_str, struct event_filter **filterp) { @@ -1736,17 +1738,18 @@ static int create_filter(struct trace_event_call *call, err = process_preds(call, filter_string, *filterp, pe); if (err && set_str) - append_filter_err(pe, *filterp); + append_filter_err(tr, pe, *filterp); create_filter_finish(pe); return err; } -int create_event_filter(struct trace_event_call *call, +int create_event_filter(struct trace_array *tr, + struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp) { - return create_filter(call, filter_str, set_str, filterp); + return create_filter(tr, call, filter_str, set_str, filterp); } /** @@ -1773,7 +1776,7 @@ static int create_system_filter(struct trace_subsystem_dir *dir, kfree((*filterp)->filter_string); (*filterp)->filter_string = NULL; } else { - append_filter_err(pe, *filterp); + append_filter_err(tr, pe, *filterp); } } create_filter_finish(pe); @@ -1804,7 +1807,7 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string) return 0; } - err = create_filter(call, filter_string, true, &filter); + err = create_filter(file->tr, call, filter_string, true, &filter); /* * Always swap the call filter with the new filter @@ -2060,7 +2063,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, if (event->filter) goto out_unlock; - err = create_filter(call, filter_str, false, &filter); + err = create_filter(NULL, call, filter_str, false, &filter); if (err) goto free_filter; @@ -2209,8 +2212,8 @@ static __init int ftrace_test_event_filter(void) struct test_filter_data_t *d = &test_filter_data[i]; int err; - err = create_filter(&event_ftrace_test_filter, d->filter, - false, &filter); + err = create_filter(NULL, &event_ftrace_test_filter, + d->filter, false, &filter); if (err) { printk(KERN_INFO "Failed to get filter for '%s', err %d\n", diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index cd12ecb66eb9..2a2912cb4533 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -731,7 +731,8 @@ int set_trigger_filter(char *filter_str, goto out; /* The filter is for the 'trigger' event, not the triggered event */ - ret = create_event_filter(file->event_call, filter_str, false, &filter); + ret = create_event_filter(file->tr, file->event_call, + filter_str, false, &filter); /* * If create_event_filter() fails, filter still needs to be freed. * Which the calling code will do with data->filter. -- cgit From d0cd871ba0d613e09366e4b6a17946dfcf51db7c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 Apr 2019 22:30:22 -0400 Subject: tracing: Have histogram code pass around trace_array for error handling Have the trace_array that associates the trace instance of the histogram passed around to functions so that error handling can display the error message in the proper instance. Reviewed-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 142 ++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 62 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 071c62cacba7..a167e439e9a1 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -619,7 +619,7 @@ static void last_cmd_set(struct trace_event_file *file, char *str) snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name); } -static void hist_err(u8 err_type, u8 err_pos) +static void hist_err(struct trace_array *tr, u8 err_type, u8 err_pos) { tracing_log_err(last_cmd_loc, last_cmd, err_text, err_type, err_pos); } @@ -1756,7 +1756,7 @@ static struct trace_event_file *find_var_file(struct trace_array *tr, if (find_var_field(var_hist_data, var_name)) { if (found) { - hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); + hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); return NULL; } @@ -1807,7 +1807,8 @@ find_match_var(struct hist_trigger_data *hist_data, char *var_name) hist_field = find_file_var(file, var_name); if (hist_field) { if (found) { - hist_err(HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name)); + hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE, + errpos(var_name)); return ERR_PTR(-EINVAL); } @@ -2042,7 +2043,8 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) return ret; } -static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) +static int parse_assignment(struct trace_array *tr, + char *str, struct hist_trigger_attrs *attrs) { int ret = 0; @@ -2098,7 +2100,7 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) char *assignment; if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { - hist_err(HIST_ERR_TOO_MANY_VARS, errpos(str)); + hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(str)); ret = -EINVAL; goto out; } @@ -2115,7 +2117,8 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) return ret; } -static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) +static struct hist_trigger_attrs * +parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) { struct hist_trigger_attrs *attrs; int ret = 0; @@ -2128,7 +2131,7 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) char *str = strsep(&trigger_str, ":"); if (strchr(str, '=')) { - ret = parse_assignment(str, attrs); + ret = parse_assignment(tr, str, attrs); if (ret) goto free; } else if (strcmp(str, "pause") == 0) @@ -2684,6 +2687,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, char *var_name) { struct hist_field *var_field = NULL, *ref_field = NULL; + struct trace_array *tr = hist_data->event_file->tr; if (!is_var_ref(var_name)) return NULL; @@ -2696,7 +2700,7 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, system, event_name); if (!ref_field) - hist_err(HIST_ERR_VAR_NOT_FOUND, errpos(var_name)); + hist_err(tr, HIST_ERR_VAR_NOT_FOUND, errpos(var_name)); return ref_field; } @@ -2707,6 +2711,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, { struct ftrace_event_field *field = NULL; char *field_name, *modifier, *str; + struct trace_array *tr = file->tr; modifier = str = kstrdup(field_str, GFP_KERNEL); if (!modifier) @@ -2730,7 +2735,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else if (strcmp(modifier, "usecs") == 0) *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; else { - hist_err(HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); + hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); field = ERR_PTR(-EINVAL); goto out; } @@ -2746,7 +2751,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err(HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); field = ERR_PTR(-EINVAL); goto out; } @@ -2808,7 +2813,8 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var); if (!s) { - hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var); + hist_field = parse_var_ref(hist_data, ref_system, + ref_event, ref_var); if (hist_field) { if (var_name) { hist_field = create_alias(hist_data, hist_field, var_name); @@ -2857,7 +2863,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, /* we support only -(xxx) i.e. explicit parens required */ if (level > 3) { - hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); + hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; } @@ -2912,7 +2918,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, return ERR_PTR(ret); } -static int check_expr_operands(struct hist_field *operand1, +static int check_expr_operands(struct trace_array *tr, + struct hist_field *operand1, struct hist_field *operand2) { unsigned long operand1_flags = operand1->flags; @@ -2940,7 +2947,7 @@ static int check_expr_operands(struct hist_field *operand1, if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { - hist_err(HIST_ERR_TIMESTAMP_MISMATCH, 0); + hist_err(tr, HIST_ERR_TIMESTAMP_MISMATCH, 0); return -EINVAL; } @@ -2958,7 +2965,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, char *sep, *operand1_str; if (level > 3) { - hist_err(HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); + hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } @@ -3003,7 +3010,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, goto free; } - ret = check_expr_operands(operand1, operand2); + ret = check_expr_operands(file->tr, operand1, operand2); if (ret) goto free; @@ -3196,14 +3203,14 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, int ret; if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { - hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); + hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); return ERR_PTR(-EINVAL); } file = event_file(tr, subsys_name, event_name); if (IS_ERR(file)) { - hist_err(HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name)); ret = PTR_ERR(file); return ERR_PTR(ret); } @@ -3216,7 +3223,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, */ hist_data = find_compatible_hist(target_hist_data, file); if (!hist_data) { - hist_err(HIST_ERR_HIST_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_HIST_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3277,7 +3284,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, kfree(cmd); kfree(var_hist->cmd); kfree(var_hist); - hist_err(HIST_ERR_HIST_CREATE_FAIL, errpos(field_name)); + hist_err(tr, HIST_ERR_HIST_CREATE_FAIL, errpos(field_name)); return ERR_PTR(ret); } @@ -3289,7 +3296,7 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data, if (IS_ERR_OR_NULL(event_var)) { kfree(var_hist->cmd); kfree(var_hist); - hist_err(HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name)); + hist_err(tr, HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name)); return ERR_PTR(-EINVAL); } @@ -3422,25 +3429,26 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, { struct hist_field *val = NULL, *var = NULL; unsigned long flags = HIST_FIELD_FL_VAR; + struct trace_array *tr = file->tr; struct field_var *field_var; int ret = 0; if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { - hist_err(HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); + hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name)); ret = -EINVAL; goto err; } val = parse_atom(hist_data, file, field_name, &flags, NULL); if (IS_ERR(val)) { - hist_err(HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name)); + hist_err(tr, HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name)); ret = PTR_ERR(val); goto err; } var = create_var(hist_data, file, field_name, val->size, val->type); if (IS_ERR(var)) { - hist_err(HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); + hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); kfree(val); ret = PTR_ERR(var); goto err; @@ -3767,19 +3775,20 @@ static int track_data_create(struct hist_trigger_data *hist_data, { struct hist_field *var_field, *ref_field, *track_var = NULL; struct trace_event_file *file = hist_data->event_file; + struct trace_array *tr = file->tr; char *track_data_var_str; int ret = 0; track_data_var_str = data->track_data.var_str; if (track_data_var_str[0] != '$') { - hist_err(HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str)); + hist_err(tr, HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str)); return -EINVAL; } track_data_var_str++; var_field = find_target_event_var(hist_data, NULL, NULL, track_data_var_str); if (!var_field) { - hist_err(HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str)); + hist_err(tr, HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str)); return -EINVAL; } @@ -3792,7 +3801,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONMAX) track_var = create_var(hist_data, file, "__max", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); + hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3800,7 +3809,7 @@ static int track_data_create(struct hist_trigger_data *hist_data, if (data->handler == HANDLER_ONCHANGE) track_var = create_var(hist_data, file, "__change", sizeof(u64), "u64"); if (IS_ERR(track_var)) { - hist_err(HIST_ERR_ONX_VAR_CREATE_FAIL, 0); + hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0); ret = PTR_ERR(track_var); goto out; } @@ -3811,7 +3820,8 @@ static int track_data_create(struct hist_trigger_data *hist_data, return ret; } -static int parse_action_params(char *params, struct action_data *data) +static int parse_action_params(struct trace_array *tr, char *params, + struct action_data *data) { char *param, *saved_param; bool first_param = true; @@ -3819,20 +3829,20 @@ static int parse_action_params(char *params, struct action_data *data) while (params) { if (data->n_params >= SYNTH_FIELDS_MAX) { - hist_err(HIST_ERR_TOO_MANY_PARAMS, 0); + hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0); goto out; } param = strsep(¶ms, ","); if (!param) { - hist_err(HIST_ERR_PARAM_NOT_FOUND, 0); + hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, 0); ret = -EINVAL; goto out; } param = strstrip(param); if (strlen(param) < 2) { - hist_err(HIST_ERR_INVALID_PARAM, errpos(param)); + hist_err(tr, HIST_ERR_INVALID_PARAM, errpos(param)); ret = -EINVAL; goto out; } @@ -3856,7 +3866,7 @@ static int parse_action_params(char *params, struct action_data *data) return ret; } -static int action_parse(char *str, struct action_data *data, +static int action_parse(struct trace_array *tr, char *str, struct action_data *data, enum handler_id handler) { char *action_name; @@ -3864,14 +3874,14 @@ static int action_parse(char *str, struct action_data *data, strsep(&str, "."); if (!str) { - hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); + hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } action_name = strsep(&str, "("); if (!action_name || !str) { - hist_err(HIST_ERR_ACTION_NOT_FOUND, 0); + hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0); ret = -EINVAL; goto out; } @@ -3880,12 +3890,12 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!params) { - hist_err(HIST_ERR_NO_SAVE_PARAMS, 0); + hist_err(tr, HIST_ERR_NO_SAVE_PARAMS, 0); ret = -EINVAL; goto out; } - ret = parse_action_params(params, data); + ret = parse_action_params(tr, params, data); if (ret) goto out; @@ -3894,7 +3904,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); + hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3906,7 +3916,7 @@ static int action_parse(char *str, struct action_data *data, char *params = strsep(&str, ")"); if (!str) { - hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(params)); + hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(params)); ret = -EINVAL; goto out; } @@ -3916,7 +3926,7 @@ static int action_parse(char *str, struct action_data *data, else if (handler == HANDLER_ONCHANGE) data->track_data.check_val = check_track_val_changed; else { - hist_err(HIST_ERR_ACTION_MISMATCH, errpos(action_name)); + hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name)); ret = -EINVAL; goto out; } @@ -3931,7 +3941,7 @@ static int action_parse(char *str, struct action_data *data, data->use_trace_keyword = true; if (params) { - ret = parse_action_params(params, data); + ret = parse_action_params(tr, params, data); if (ret) goto out; } @@ -3984,7 +3994,7 @@ static struct action_data *track_data_parse(struct hist_trigger_data *hist_data, goto free; } - ret = action_parse(str, data, handler); + ret = action_parse(hist_data->event_file->tr, str, data, handler); if (ret) goto free; out: @@ -4054,6 +4064,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, struct action_data *data, char *system, char *event, char *var) { + struct trace_array *tr = hist_data->event_file->tr; struct hist_field *hist_field; var++; /* skip '$' */ @@ -4069,7 +4080,7 @@ trace_action_find_var(struct hist_trigger_data *hist_data, } if (!hist_field) - hist_err(HIST_ERR_PARAM_NOT_FOUND, errpos(var)); + hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, errpos(var)); return hist_field; } @@ -4127,6 +4138,7 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data, static int trace_action_create(struct hist_trigger_data *hist_data, struct action_data *data) { + struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; unsigned int i, var_ref_idx; @@ -4144,7 +4156,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event = find_synth_event(synth_event_name); if (!event) { - hist_err(HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name)); + hist_err(tr, HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name)); return -EINVAL; } @@ -4205,14 +4217,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, continue; } - hist_err(HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param)); + hist_err(tr, HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param)); kfree(p); ret = -EINVAL; goto err; } if (field_pos != event->n_fields) { - hist_err(HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name)); + hist_err(tr, HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name)); ret = -EINVAL; goto err; } @@ -4231,6 +4243,7 @@ static int action_create(struct hist_trigger_data *hist_data, struct action_data *data) { struct trace_event_file *file = hist_data->event_file; + struct trace_array *tr = file->tr; struct track_data *track_data; struct field_var *field_var; unsigned int i; @@ -4258,7 +4271,7 @@ static int action_create(struct hist_trigger_data *hist_data, if (data->action == ACTION_SAVE) { if (hist_data->n_save_vars) { ret = -EEXIST; - hist_err(HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0); + hist_err(tr, HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0); goto out; } @@ -4271,7 +4284,8 @@ static int action_create(struct hist_trigger_data *hist_data, field_var = create_target_field_var(hist_data, NULL, NULL, param); if (IS_ERR(field_var)) { - hist_err(HIST_ERR_FIELD_VAR_CREATE_FAIL, errpos(param)); + hist_err(tr, HIST_ERR_FIELD_VAR_CREATE_FAIL, + errpos(param)); ret = PTR_ERR(field_var); kfree(param); goto out; @@ -4305,18 +4319,18 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) match_event = strsep(&str, ")"); if (!match_event || !str) { - hist_err(HIST_ERR_NO_CLOSING_PAREN, errpos(match_event)); + hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(match_event)); goto free; } match_event_system = strsep(&match_event, "."); if (!match_event) { - hist_err(HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system)); + hist_err(tr, HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system)); goto free; } if (IS_ERR(event_file(tr, match_event_system, match_event))) { - hist_err(HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event)); + hist_err(tr, HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event)); goto free; } @@ -4332,7 +4346,7 @@ static struct action_data *onmatch_parse(struct trace_array *tr, char *str) goto free; } - ret = action_parse(str, data, HANDLER_ONMATCH); + ret = action_parse(tr, str, data, HANDLER_ONMATCH); if (ret) goto free; out: @@ -4401,13 +4415,14 @@ static int create_var_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *var_name, char *expr_str) { + struct trace_array *tr = hist_data->event_file->tr; unsigned long flags = 0; if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) return -EINVAL; if (find_var(hist_data, file, var_name) && !hist_data->remove) { - hist_err(HIST_ERR_DUPLICATE_VAR, errpos(var_name)); + hist_err(tr, HIST_ERR_DUPLICATE_VAR, errpos(var_name)); return -EINVAL; } @@ -4464,8 +4479,8 @@ static int create_key_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *field_str) { + struct trace_array *tr = hist_data->event_file->tr; struct hist_field *hist_field = NULL; - unsigned long flags = 0; unsigned int key_size; int ret = 0; @@ -4488,7 +4503,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, } if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { - hist_err(HIST_ERR_INVALID_REF_KEY, errpos(field_str)); + hist_err(tr, HIST_ERR_INVALID_REF_KEY, errpos(field_str)); destroy_hist_field(hist_field, 0); ret = -EINVAL; goto out; @@ -4589,6 +4604,7 @@ static void free_var_defs(struct hist_trigger_data *hist_data) static int parse_var_defs(struct hist_trigger_data *hist_data) { + struct trace_array *tr = hist_data->event_file->tr; char *s, *str, *var_name, *field_str; unsigned int i, j, n_vars = 0; int ret = 0; @@ -4602,13 +4618,14 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) var_name = strsep(&field_str, "="); if (!var_name || !field_str) { - hist_err(HIST_ERR_MALFORMED_ASSIGNMENT, errpos(var_name)); + hist_err(tr, HIST_ERR_MALFORMED_ASSIGNMENT, + errpos(var_name)); ret = -EINVAL; goto free; } if (n_vars == TRACING_MAP_VARS_MAX) { - hist_err(HIST_ERR_TOO_MANY_VARS, errpos(var_name)); + hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(var_name)); ret = -EINVAL; goto free; } @@ -5829,6 +5846,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, { struct hist_trigger_data *hist_data = data->private_data; struct event_trigger_data *test, *named_data = NULL; + struct trace_array *tr = file->tr; int ret = 0; if (hist_data->attrs->name) { @@ -5836,7 +5854,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (named_data) { if (!hist_trigger_match(data, named_data, named_data, true)) { - hist_err(HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name)); + hist_err(tr, HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name)); ret = -EINVAL; goto out; } @@ -5857,7 +5875,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, else if (hist_data->attrs->clear) hist_clear(test); else { - hist_err(HIST_ERR_TRIGGER_EEXIST, 0); + hist_err(tr, HIST_ERR_TRIGGER_EEXIST, 0); ret = -EEXIST; } goto out; @@ -5865,7 +5883,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, } new: if (hist_data->attrs->cont || hist_data->attrs->clear) { - hist_err(HIST_ERR_TRIGGER_ENOENT_CLEAR, 0); + hist_err(tr, HIST_ERR_TRIGGER_ENOENT_CLEAR, 0); ret = -ENOENT; goto out; } @@ -5890,7 +5908,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, ret = tracing_set_clock(file->tr, hist_data->attrs->clock); if (ret) { - hist_err(HIST_ERR_SET_CLOCK_FAIL, errpos(clock)); + hist_err(tr, HIST_ERR_SET_CLOCK_FAIL, errpos(clock)); goto out; } @@ -6108,7 +6126,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, trigger = strstrip(trigger); } - attrs = parse_hist_trigger_attrs(trigger); + attrs = parse_hist_trigger_attrs(file->tr, trigger); if (IS_ERR(attrs)) return PTR_ERR(attrs); -- cgit From 2f754e771b1a6feba670782e82c45555984ac43b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 Apr 2019 22:52:21 -0400 Subject: tracing: Have the error logs show up in the proper instances As each instance has their own error_log file, it makes more sense that the instances show the errors of their own instead of all error_logs having the same data. Make it that the errors show up in the instance error_log file that the error happens in. If no instance trace_array is available, then NULL can be passed in which will create the error in the top level instance (the one at the top of the tracefs directory). Reviewed-by: Masami Hiramatsu Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 55 +++++++++++++++++++++++++------------- kernel/trace/trace.h | 5 +++- kernel/trace/trace_events_filter.c | 4 +-- kernel/trace/trace_events_hist.c | 3 ++- kernel/trace/trace_probe.c | 2 +- 5 files changed, 46 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7978168f5041..3d55e9daae8c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6897,25 +6897,22 @@ struct tracing_log_err { char cmd[MAX_FILTER_STR_VAL]; /* what caused err */ }; -static LIST_HEAD(tracing_err_log); static DEFINE_MUTEX(tracing_err_log_lock); -static unsigned int n_tracing_err_log_entries; - -struct tracing_log_err *get_tracing_log_err(void) +struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) { struct tracing_log_err *err; - if (n_tracing_err_log_entries < TRACING_LOG_ERRS_MAX) { + if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); - n_tracing_err_log_entries++; + tr->n_err_log_entries++; return err; } - err = list_first_entry(&tracing_err_log, struct tracing_log_err, list); + err = list_first_entry(&tr->err_log, struct tracing_log_err, list); list_del(&err->list); return err; @@ -6949,6 +6946,7 @@ unsigned int err_pos(char *cmd, const char *str) /** * tracing_log_err - write an error to the tracing error log + * @tr: The associated trace array for the error (NULL for top level array) * @loc: A string describing where the error occurred * @cmd: The tracing command that caused the error * @errs: The array of loc-specific static error strings @@ -6973,13 +6971,17 @@ unsigned int err_pos(char *cmd, const char *str) * existing callers for examples of how static strings are typically * defined for use with tracing_log_err(). */ -void tracing_log_err(const char *loc, const char *cmd, +void tracing_log_err(struct trace_array *tr, + const char *loc, const char *cmd, const char **errs, u8 type, u8 pos) { struct tracing_log_err *err; + if (!tr) + tr = &global_trace; + mutex_lock(&tracing_err_log_lock); - err = get_tracing_log_err(); + err = get_tracing_log_err(tr); if (PTR_ERR(err) == -ENOMEM) { mutex_unlock(&tracing_err_log_lock); return; @@ -6993,34 +6995,38 @@ void tracing_log_err(const char *loc, const char *cmd, err->info.pos = pos; err->info.ts = local_clock(); - list_add_tail(&err->list, &tracing_err_log); + list_add_tail(&err->list, &tr->err_log); mutex_unlock(&tracing_err_log_lock); } -static void clear_tracing_err_log(void) +static void clear_tracing_err_log(struct trace_array *tr) { struct tracing_log_err *err, *next; mutex_lock(&tracing_err_log_lock); - list_for_each_entry_safe(err, next, &tracing_err_log, list) { + list_for_each_entry_safe(err, next, &tr->err_log, list) { list_del(&err->list); kfree(err); } - n_tracing_err_log_entries = 0; + tr->n_err_log_entries = 0; mutex_unlock(&tracing_err_log_lock); } static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) { + struct trace_array *tr = m->private; + mutex_lock(&tracing_err_log_lock); - return seq_list_start(&tracing_err_log, *pos); + return seq_list_start(&tr->err_log, *pos); } static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) { - return seq_list_next(v, &tracing_err_log, pos); + struct trace_array *tr = m->private; + + return seq_list_next(v, &tr->err_log, pos); } static void tracing_err_log_seq_stop(struct seq_file *m, void *v) @@ -7067,15 +7073,25 @@ static const struct seq_operations tracing_err_log_seq_ops = { static int tracing_err_log_open(struct inode *inode, struct file *file) { + struct trace_array *tr = inode->i_private; int ret = 0; + if (trace_array_get(tr) < 0) + return -ENODEV; + /* If this file was opened for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) - clear_tracing_err_log(); + clear_tracing_err_log(tr); - if (file->f_mode & FMODE_READ) + if (file->f_mode & FMODE_READ) { ret = seq_open(file, &tracing_err_log_seq_ops); - + if (!ret) { + struct seq_file *m = file->private_data; + m->private = tr; + } else { + trace_array_put(tr); + } + } return ret; } @@ -7091,6 +7107,7 @@ static const struct file_operations tracing_err_log_fops = { .write = tracing_err_log_write, .read = seq_read, .llseek = seq_lseek, + .release = tracing_release_generic_tr, }; static int tracing_buffers_open(struct inode *inode, struct file *filp) @@ -8293,6 +8310,7 @@ struct trace_array *trace_array_create(const char *name) INIT_LIST_HEAD(&tr->systems); INIT_LIST_HEAD(&tr->events); INIT_LIST_HEAD(&tr->hist_vars); + INIT_LIST_HEAD(&tr->err_log); if (allocate_trace_buffers(tr, trace_buf_size) < 0) goto out_free_tr; @@ -9087,6 +9105,7 @@ __init static int tracer_alloc_buffers(void) INIT_LIST_HEAD(&global_trace.systems); INIT_LIST_HEAD(&global_trace.events); INIT_LIST_HEAD(&global_trace.hist_vars); + INIT_LIST_HEAD(&global_trace.err_log); list_add(&global_trace.list, &ftrace_trace_arrays); apply_trace_boot_options(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 809c5d7f0064..da00a3d508c1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -293,11 +293,13 @@ struct trace_array { int nr_topts; bool clear_trace; int buffer_percent; + unsigned int n_err_log_entries; struct tracer *current_trace; unsigned int trace_flags; unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; unsigned int flags; raw_spinlock_t start_lock; + struct list_head err_log; struct dentry *dir; struct dentry *options; struct dentry *percpu_dir; @@ -1886,7 +1888,8 @@ extern ssize_t trace_parse_run_command(struct file *file, int (*createfn)(int, char**)); extern unsigned int err_pos(char *cmd, const char *str); -extern void tracing_log_err(const char *loc, const char *cmd, +extern void tracing_log_err(struct trace_array *tr, + const char *loc, const char *cmd, const char **errs, u8 type, u8 pos); /* diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2b63930cd3e6..180ecb390baa 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -949,12 +949,12 @@ static void append_filter_err(struct trace_array *tr, if (pe->lasterr > 0) { trace_seq_printf(s, "\n%*s", pos, "^"); trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); - tracing_log_err("event filter parse error", + tracing_log_err(tr, "event filter parse error", filter->filter_string, err_text, pe->lasterr, pe->lasterr_pos); } else { trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); - tracing_log_err("event filter parse error", + tracing_log_err(tr, "event filter parse error", filter->filter_string, err_text, FILT_ERR_ERRNO, 0); } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index a167e439e9a1..a1136e043f17 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -621,7 +621,8 @@ static void last_cmd_set(struct trace_event_file *file, char *str) static void hist_err(struct trace_array *tr, u8 err_type, u8 err_pos) { - tracing_log_err(last_cmd_loc, last_cmd, err_text, err_type, err_pos); + tracing_log_err(tr, last_cmd_loc, last_cmd, err_text, + err_type, err_pos); } static void hist_err_clear(void) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e11f98c49d72..4cc2d467d34c 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -186,7 +186,7 @@ void __trace_probe_log_err(int offset, int err_type) } *(p - 1) = '\0'; - tracing_log_err(trace_probe_log.subsystem, command, + tracing_log_err(NULL, trace_probe_log.subsystem, command, trace_probe_err_text, err_type, pos + offset); kfree(command); -- cgit From a8d655792a32312f6715ac789b860fee50168106 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 31 Mar 2019 18:48:25 -0500 Subject: tracing: Add error_log to README Add brief blurb about error_log to the 'Important files' section. Link: http://lkml.kernel.org/r/c81e60f9aded495081231a32d2d1023c4d043a7a.1554072478.git.tom.zanussi@linux.intel.com Acked-by: Masami Hiramatsu Acked-by: Namhyung Kim Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3d55e9daae8c..2bc18de7f0dc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4702,6 +4702,7 @@ static const char readme_msg[] = " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" " current_tracer\t- function and latency tracers\n" " available_tracers\t- list of configured tracers for current_tracer\n" + " error_log\t- error log for failed commands (that support it)\n" " buffer_size_kb\t- view and modify size of per cpu buffer\n" " buffer_total_size_kb - view total size of all cpu buffers\n\n" " trace_clock\t\t-change the clock used to order events\n" -- cgit From 4f5fbd78a7b40bab538ae0d316363530da751e42 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 26 Mar 2019 20:13:11 +0800 Subject: rcu: validate arguments for rcu tracepoints When CONFIG_RCU_TRACE is not set, all these tracepoints are defined as do-nothing macro. We'd better make those inline functions that take proper arguments. As RCU_TRACE() is defined as do-nothing marco as well when CONFIG_RCU_TRACE is not set, so we can clean it up. Link: http://lkml.kernel.org/r/1553602391-11926-4-git-send-email-laoar.shao@gmail.com Reviewed-by: Paul E. McKenney Signed-off-by: Yafang Shao Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/rcu.h | 81 ++++++++++++++-------------------------------- kernel/rcu/rcu.h | 9 ++---- kernel/rcu/tree.c | 8 ++--- 3 files changed, 31 insertions(+), 67 deletions(-) (limited to 'kernel') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index f0c4d10e614b..e3f357b89432 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -7,6 +7,12 @@ #include +#ifdef CONFIG_RCU_TRACE +#define TRACE_EVENT_RCU TRACE_EVENT +#else +#define TRACE_EVENT_RCU TRACE_EVENT_NOP +#endif + /* * Tracepoint for start/end markers used for utilization calculations. * By convention, the string is of the following forms: @@ -35,8 +41,6 @@ TRACE_EVENT(rcu_utilization, TP_printk("%s", __entry->s) ); -#ifdef CONFIG_RCU_TRACE - #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) /* @@ -62,7 +66,7 @@ TRACE_EVENT(rcu_utilization, * "end": End a grace period. * "cpuend": CPU first notices a grace-period end. */ -TRACE_EVENT(rcu_grace_period, +TRACE_EVENT_RCU(rcu_grace_period, TP_PROTO(const char *rcuname, unsigned long gp_seq, const char *gpevent), @@ -101,7 +105,7 @@ TRACE_EVENT(rcu_grace_period, * "Cleanup": Clean up rcu_node structure after previous GP. * "CleanupMore": Clean up, and another GP is needed. */ -TRACE_EVENT(rcu_future_grace_period, +TRACE_EVENT_RCU(rcu_future_grace_period, TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long gp_seq_req, u8 level, int grplo, int grphi, @@ -141,7 +145,7 @@ TRACE_EVENT(rcu_future_grace_period, * rcu_node structure, and the mask of CPUs that will be waited for. * All but the type of RCU are extracted from the rcu_node structure. */ -TRACE_EVENT(rcu_grace_period_init, +TRACE_EVENT_RCU(rcu_grace_period_init, TP_PROTO(const char *rcuname, unsigned long gp_seq, u8 level, int grplo, int grphi, unsigned long qsmask), @@ -186,7 +190,7 @@ TRACE_EVENT(rcu_grace_period_init, * "endwake": Woke piggybackers up. * "done": Someone else did the expedited grace period for us. */ -TRACE_EVENT(rcu_exp_grace_period, +TRACE_EVENT_RCU(rcu_exp_grace_period, TP_PROTO(const char *rcuname, unsigned long gpseq, const char *gpevent), @@ -218,7 +222,7 @@ TRACE_EVENT(rcu_exp_grace_period, * "nxtlvl": Advance to next level of rcu_node funnel * "wait": Wait for someone else to do expedited GP */ -TRACE_EVENT(rcu_exp_funnel_lock, +TRACE_EVENT_RCU(rcu_exp_funnel_lock, TP_PROTO(const char *rcuname, u8 level, int grplo, int grphi, const char *gpevent), @@ -269,7 +273,7 @@ TRACE_EVENT(rcu_exp_funnel_lock, * "WaitQueue": Enqueue partially done, timed wait for it to complete. * "WokeQueue": Partial enqueue now complete. */ -TRACE_EVENT(rcu_nocb_wake, +TRACE_EVENT_RCU(rcu_nocb_wake, TP_PROTO(const char *rcuname, int cpu, const char *reason), @@ -297,7 +301,7 @@ TRACE_EVENT(rcu_nocb_wake, * include SRCU), the grace-period number that the task is blocking * (the current or the next), and the task's PID. */ -TRACE_EVENT(rcu_preempt_task, +TRACE_EVENT_RCU(rcu_preempt_task, TP_PROTO(const char *rcuname, int pid, unsigned long gp_seq), @@ -324,7 +328,7 @@ TRACE_EVENT(rcu_preempt_task, * read-side critical section exiting that critical section. Track the * type of RCU (which one day might include SRCU) and the task's PID. */ -TRACE_EVENT(rcu_unlock_preempted_task, +TRACE_EVENT_RCU(rcu_unlock_preempted_task, TP_PROTO(const char *rcuname, unsigned long gp_seq, int pid), @@ -353,7 +357,7 @@ TRACE_EVENT(rcu_unlock_preempted_task, * whether there are any blocked tasks blocking the current grace period. * All but the type of RCU are extracted from the rcu_node structure. */ -TRACE_EVENT(rcu_quiescent_state_report, +TRACE_EVENT_RCU(rcu_quiescent_state_report, TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long mask, unsigned long qsmask, @@ -396,7 +400,7 @@ TRACE_EVENT(rcu_quiescent_state_report, * state, which can be "dti" for dyntick-idle mode or "kick" when kicking * a CPU that has been in dyntick-idle mode for too long. */ -TRACE_EVENT(rcu_fqs, +TRACE_EVENT_RCU(rcu_fqs, TP_PROTO(const char *rcuname, unsigned long gp_seq, int cpu, const char *qsevent), @@ -436,7 +440,7 @@ TRACE_EVENT(rcu_fqs, * events use two separate counters, and that the "++=" and "--=" events * for irq/NMI will change the counter by two, otherwise by one. */ -TRACE_EVENT(rcu_dyntick, +TRACE_EVENT_RCU(rcu_dyntick, TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks), @@ -468,7 +472,7 @@ TRACE_EVENT(rcu_dyntick, * number of lazy callbacks queued, and the fourth element is the * total number of callbacks queued. */ -TRACE_EVENT(rcu_callback, +TRACE_EVENT_RCU(rcu_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp, long qlen_lazy, long qlen), @@ -504,7 +508,7 @@ TRACE_EVENT(rcu_callback, * the fourth argument is the number of lazy callbacks queued, and the * fifth argument is the total number of callbacks queued. */ -TRACE_EVENT(rcu_kfree_callback, +TRACE_EVENT_RCU(rcu_kfree_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset, long qlen_lazy, long qlen), @@ -539,7 +543,7 @@ TRACE_EVENT(rcu_kfree_callback, * the total number of callbacks queued, and the fourth argument is * the current RCU-callback batch limit. */ -TRACE_EVENT(rcu_batch_start, +TRACE_EVENT_RCU(rcu_batch_start, TP_PROTO(const char *rcuname, long qlen_lazy, long qlen, long blimit), @@ -569,7 +573,7 @@ TRACE_EVENT(rcu_batch_start, * The first argument is the type of RCU, and the second argument is * a pointer to the RCU callback itself. */ -TRACE_EVENT(rcu_invoke_callback, +TRACE_EVENT_RCU(rcu_invoke_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp), @@ -598,7 +602,7 @@ TRACE_EVENT(rcu_invoke_callback, * is the offset of the callback within the enclosing RCU-protected * data structure. */ -TRACE_EVENT(rcu_invoke_kfree_callback, +TRACE_EVENT_RCU(rcu_invoke_kfree_callback, TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset), @@ -631,7 +635,7 @@ TRACE_EVENT(rcu_invoke_kfree_callback, * and the sixth argument (risk) is the return value from * rcu_is_callbacks_kthread(). */ -TRACE_EVENT(rcu_batch_end, +TRACE_EVENT_RCU(rcu_batch_end, TP_PROTO(const char *rcuname, int callbacks_invoked, char cb, char nr, char iit, char risk), @@ -673,7 +677,7 @@ TRACE_EVENT(rcu_batch_end, * callback address can be NULL. */ #define RCUTORTURENAME_LEN 8 -TRACE_EVENT(rcu_torture_read, +TRACE_EVENT_RCU(rcu_torture_read, TP_PROTO(const char *rcutorturename, struct rcu_head *rhp, unsigned long secs, unsigned long c_old, unsigned long c), @@ -721,7 +725,7 @@ TRACE_EVENT(rcu_torture_read, * The "cpu" argument is the CPU or -1 if meaningless, the "cnt" argument * is the count of remaining callbacks, and "done" is the piggybacking count. */ -TRACE_EVENT(rcu_barrier, +TRACE_EVENT_RCU(rcu_barrier, TP_PROTO(const char *rcuname, const char *s, int cpu, int cnt, unsigned long done), @@ -748,41 +752,6 @@ TRACE_EVENT(rcu_barrier, __entry->done) ); -#else /* #ifdef CONFIG_RCU_TRACE */ - -#define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0) -#define trace_rcu_future_grace_period(rcuname, gp_seq, gp_seq_req, \ - level, grplo, grphi, event) \ - do { } while (0) -#define trace_rcu_grace_period_init(rcuname, gp_seq, level, grplo, grphi, \ - qsmask) do { } while (0) -#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \ - do { } while (0) -#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \ - do { } while (0) -#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0) -#define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0) -#define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0) -#define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \ - grplo, grphi, gp_tasks) do { } \ - while (0) -#define trace_rcu_fqs(rcuname, gp_seq, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0) -#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) -#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ - do { } while (0) -#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \ - do { } while (0) -#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) -#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) -#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ - do { } while (0) -#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ - do { } while (0) -#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) - -#endif /* #else #ifdef CONFIG_RCU_TRACE */ - #endif /* _TRACE_RCU_H */ /* This part must be outside protection */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index acee72c0b24b..442ace406ac9 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -11,11 +11,6 @@ #define __LINUX_RCU_H #include -#ifdef CONFIG_RCU_TRACE -#define RCU_TRACE(stmt) stmt -#else /* #ifdef CONFIG_RCU_TRACE */ -#define RCU_TRACE(stmt) -#endif /* #else #ifdef CONFIG_RCU_TRACE */ /* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ #define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) @@ -216,12 +211,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) rcu_lock_acquire(&rcu_callback_map); if (__is_kfree_rcu_offset(offset)) { - RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);) + trace_rcu_invoke_kfree_callback(rn, head, offset); kfree((void *)head - offset); rcu_lock_release(&rcu_callback_map); return true; } else { - RCU_TRACE(trace_rcu_invoke_callback(rn, head);) + trace_rcu_invoke_callback(rn, head); f = head->func; WRITE_ONCE(head->func, (rcu_callback_t)0L); f(head); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index acd6ccf56faf..906563a1cdea 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2352,14 +2352,14 @@ rcu_check_quiescent_state(struct rcu_data *rdp) */ int rcutree_dying_cpu(unsigned int cpu) { - RCU_TRACE(bool blkd;) - RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(&rcu_data);) - RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) + bool blkd; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_node *rnp = rdp->mynode; if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) return 0; - RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);) + blkd = !!(rnp->qsmask & rdp->grpmask); trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); return 0; -- cgit From c13edf8106f6ad1edb9b7e011351fbaf83ceb992 Mon Sep 17 00:00:00 2001 From: Clément Leger Date: Wed, 27 Mar 2019 14:06:27 +0100 Subject: dma: select GENERIC_ALLOCATOR for DMA_REMAP When DMA_REMAP is enabled, code in remap.c needs generic allocator. It currently worked since few architectures uses it (arm64, csky) and they both select GENERIC_ALLOCATOR. Select it when using DMA_REMAP to have correct dependencies. Signed-off-by: Clement Leger Signed-off-by: Christoph Hellwig --- kernel/dma/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index a06ba3013b3b..52b704e2b97a 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -57,6 +57,7 @@ config SWIOTLB config DMA_REMAP depends on MMU + select GENERIC_ALLOCATOR bool config DMA_DIRECT_REMAP -- cgit From d7e02a931235de0779d44c6f8d211df0eca304b8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Mar 2019 18:45:21 +0100 Subject: dma-mapping: remove leftover NULL device support Most dma_map_ops implementations already had some issues with a NULL device, or did simply crash if one was fed to them. Now that we have cleaned up all the obvious offenders we can stop to pretend we support this mode. Signed-off-by: Christoph Hellwig --- Documentation/DMA-API-HOWTO.txt | 13 ++++++------- include/linux/dma-mapping.h | 6 +++--- kernel/dma/direct.c | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 1a721d0f35c8..0e98cf7466dd 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -365,13 +365,12 @@ __get_free_pages() (but takes size instead of a page order). If your driver needs regions sized smaller than a page, you may prefer using the dma_pool interface, described below. -The consistent DMA mapping interfaces, for non-NULL dev, will by -default return a DMA address which is 32-bit addressable. Even if the -device indicates (via DMA mask) that it may address the upper 32-bits, -consistent allocation will only return > 32-bit addresses for DMA if -the consistent DMA mask has been explicitly changed via -dma_set_coherent_mask(). This is true of the dma_pool interface as -well. +The consistent DMA mapping interfaces, will by default return a DMA address +which is 32-bit addressable. Even if the device indicates (via the DMA mask) +that it may address the upper 32-bits, consistent allocation will only +return > 32-bit addresses for DMA if the consistent DMA mask has been +explicitly changed via dma_set_coherent_mask(). This is true of the +dma_pool interface as well. dma_alloc_coherent() returns two values: the virtual address which you can use to access it from the CPU and dma_handle which you pass to the diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 75e60be91e5f..6309a721394b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -267,9 +267,9 @@ size_t dma_direct_max_mapping_size(struct device *dev); static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - if (dev && dev->dma_ops) + if (dev->dma_ops) return dev->dma_ops; - return get_arch_dma_ops(dev ? dev->bus : NULL); + return get_arch_dma_ops(dev->bus); } static inline void set_dma_ops(struct device *dev, @@ -650,7 +650,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, static inline u64 dma_get_mask(struct device *dev) { - if (dev && dev->dma_mask && *dev->dma_mask) + if (dev->dma_mask && *dev->dma_mask) return *dev->dma_mask; return DMA_BIT_MASK(32); } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index fcdb23e8d2fc..2c2772e9702a 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -311,7 +311,7 @@ static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, size_t size) { return swiotlb_force != SWIOTLB_FORCE && - (!dev || dma_capable(dev, dma_addr, size)); + dma_capable(dev, dma_addr, size); } dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, -- cgit From e43e2657fe77a37b13643e2469670ecdb0ba5e10 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Dec 2018 14:32:02 +0100 Subject: x86/dma: Remove the x86_dma_fallback_dev hack Now that we removed support for the NULL device argument in the DMA API, there is no need to cater for that in the x86 code. Signed-off-by: Christoph Hellwig --- arch/x86/include/asm/dma-mapping.h | 10 ---------- arch/x86/kernel/amd_gart_64.c | 6 ------ arch/x86/kernel/pci-dma.c | 20 -------------------- kernel/dma/mapping.c | 7 ------- 4 files changed, 43 deletions(-) (limited to 'kernel') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ce4d176b3d13..6b15a24930e0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -13,14 +13,7 @@ #include #include -#ifdef CONFIG_ISA -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) -#else -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) -#endif - extern int iommu_merge; -extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; extern const struct dma_map_ops *dma_ops; @@ -30,7 +23,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -bool arch_dma_alloc_attrs(struct device **dev); -#define arch_dma_alloc_attrs arch_dma_alloc_attrs - #endif diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 2c0aa34af69c..bf7f13ea3c64 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -233,9 +233,6 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page, unsigned long bus; phys_addr_t paddr = page_to_phys(page) + offset; - if (!dev) - dev = &x86_dma_fallback_dev; - if (!need_iommu(dev, paddr, size)) return paddr; @@ -392,9 +389,6 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (nents == 0) return 0; - if (!dev) - dev = &x86_dma_fallback_dev; - out = 0; start = 0; start_sg = sg; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d460998ae828..dcd272dbd0a9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -51,14 +51,6 @@ int iommu_pass_through __read_mostly; extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; -/* Dummy device used for NULL arguments (normally ISA). */ -struct device x86_dma_fallback_dev = { - .init_name = "fallback device", - .coherent_dma_mask = ISA_DMA_BIT_MASK, - .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, -}; -EXPORT_SYMBOL(x86_dma_fallback_dev); - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; @@ -77,18 +69,6 @@ void __init pci_iommu_alloc(void) } } -bool arch_dma_alloc_attrs(struct device **dev) -{ - if (!*dev) - *dev = &x86_dma_fallback_dev; - - if (!is_device_dma_capable(*dev)) - return false; - return true; - -} -EXPORT_SYMBOL(arch_dma_alloc_attrs); - /* * See for the iommu kernel * parameter documentation. diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index c000906348c9..685a53f2a793 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -238,10 +238,6 @@ u64 dma_get_required_mask(struct device *dev) } EXPORT_SYMBOL_GPL(dma_get_required_mask); -#ifndef arch_dma_alloc_attrs -#define arch_dma_alloc_attrs(dev) (true) -#endif - void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { @@ -256,9 +252,6 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, /* let the implementation decide on the zone to allocate from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (!arch_dma_alloc_attrs(&dev)) - return NULL; - if (dma_is_direct(ops)) cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); else if (ops->alloc) -- cgit From 24acfb71822566e4d469b4992a7b3b9f873e0083 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Mar 2019 17:55:47 +0100 Subject: workqueue: Use normal rcu There is no need for sched_rcu. The undocumented reason why sched_rcu is used is to avoid a few explicit rcu_read_lock()/unlock() pairs by the fact that sched_rcu reader side critical sections are also protected by preempt or irq disabled regions. Replace rcu_read_lock_sched with rcu_read_lock and acquire the RCU lock where it is not yet explicit acquired. Replace local_irq_disable() with rcu_read_lock(). Update asserts. Signed-off-by: Thomas Gleixner [bigeasy: mangle changelog a little] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Tejun Heo --- kernel/workqueue.c | 93 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 21721faa923c..37a32884986b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -127,16 +127,16 @@ enum { * * PL: wq_pool_mutex protected. * - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. + * PR: wq_pool_mutex protected for writes. RCU protected for reads. * * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. * * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or - * sched-RCU for reads. + * RCU for reads. * * WQ: wq->mutex protected. * - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. + * WR: wq->mutex protected for writes. RCU protected for reads. * * MD: wq_mayday_lock protected. */ @@ -183,7 +183,7 @@ struct worker_pool { atomic_t nr_running ____cacheline_aligned_in_smp; /* - * Destruction of pool is sched-RCU protected to allow dereferences + * Destruction of pool is RCU protected to allow dereferences * from get_work_pool(). */ struct rcu_head rcu; @@ -212,7 +212,7 @@ struct pool_workqueue { /* * Release of unbound pwq is punted to system_wq. See put_pwq() * and pwq_unbound_release_workfn() for details. pool_workqueue - * itself is also sched-RCU protected so that the first pwq can be + * itself is also RCU protected so that the first pwq can be * determined without grabbing wq->mutex. */ struct work_struct unbound_release_work; @@ -266,8 +266,8 @@ struct workqueue_struct { char name[WQ_NAME_LEN]; /* I: workqueue name */ /* - * Destruction of workqueue_struct is sched-RCU protected to allow - * walking the workqueues list without grabbing wq_pool_mutex. + * Destruction of workqueue_struct is RCU protected to allow walking + * the workqueues list without grabbing wq_pool_mutex. * This is used to dump all workqueues from sysrq. */ struct rcu_head rcu; @@ -359,20 +359,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); #include #define assert_rcu_or_pool_mutex() \ - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&wq_pool_mutex), \ - "sched RCU or wq_pool_mutex should be held") + "RCU or wq_pool_mutex should be held") #define assert_rcu_or_wq_mutex(wq) \ - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&wq->mutex), \ - "sched RCU or wq->mutex should be held") + "RCU or wq->mutex should be held") #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&wq->mutex) && \ !lockdep_is_held(&wq_pool_mutex), \ - "sched RCU, wq->mutex or wq_pool_mutex should be held") + "RCU, wq->mutex or wq_pool_mutex should be held") #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ @@ -384,7 +384,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); * @pool: iteration cursor * @pi: integer used for iteration * - * This must be called either with wq_pool_mutex held or sched RCU read + * This must be called either with wq_pool_mutex held or RCU read * locked. If the pool needs to be used beyond the locking in effect, the * caller is responsible for guaranteeing that the pool stays online. * @@ -416,7 +416,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); * @pwq: iteration cursor * @wq: the target workqueue * - * This must be called either with wq->mutex held or sched RCU read locked. + * This must be called either with wq->mutex held or RCU read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. * @@ -552,7 +552,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) * @wq: the target workqueue * @node: the node ID * - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU + * This must be called with any of wq_pool_mutex, wq->mutex or RCU * read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. @@ -696,8 +696,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work) * @work: the work item of interest * * Pools are created and destroyed under wq_pool_mutex, and allows read - * access under sched-RCU read lock. As such, this function should be - * called under wq_pool_mutex or with preemption disabled. + * access under RCU read lock. As such, this function should be + * called under wq_pool_mutex or inside of a rcu_read_lock() region. * * All fields of the returned pool are accessible as long as the above * mentioned locking is in effect. If the returned pool needs to be used @@ -1133,7 +1133,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) { if (pwq) { /* - * As both pwqs and pools are sched-RCU protected, the + * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ spin_lock_irq(&pwq->pool->lock); @@ -1261,6 +1261,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) return 0; + rcu_read_lock(); /* * The queueing is in progress, or it is already queued. Try to * steal it from ->worklist without clearing WORK_STRUCT_PENDING. @@ -1299,10 +1300,12 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, set_work_pool_and_keep_pending(work, pool->id); spin_unlock(&pool->lock); + rcu_read_unlock(); return 1; } spin_unlock(&pool->lock); fail: + rcu_read_unlock(); local_irq_restore(*flags); if (work_is_canceling(work)) return -ENOENT; @@ -1416,6 +1419,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, if (unlikely(wq->flags & __WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq))) return; + rcu_read_lock(); retry: if (req_cpu == WORK_CPU_UNBOUND) cpu = wq_select_unbound_cpu(raw_smp_processor_id()); @@ -1472,10 +1476,8 @@ retry: /* pwq determined, queue */ trace_workqueue_queue_work(req_cpu, pwq, work); - if (WARN_ON(!list_empty(&work->entry))) { - spin_unlock(&pwq->pool->lock); - return; - } + if (WARN_ON(!list_empty(&work->entry))) + goto out; pwq->nr_in_flight[pwq->work_color]++; work_flags = work_color_to_flags(pwq->work_color); @@ -1493,7 +1495,9 @@ retry: insert_work(pwq, work, worklist, work_flags); +out: spin_unlock(&pwq->pool->lock); + rcu_read_unlock(); } /** @@ -2975,14 +2979,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, might_sleep(); - local_irq_disable(); + rcu_read_lock(); pool = get_work_pool(work); if (!pool) { - local_irq_enable(); + rcu_read_unlock(); return false; } - spin_lock(&pool->lock); + spin_lock_irq(&pool->lock); /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { @@ -3014,10 +3018,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, lock_map_acquire(&pwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map); } - + rcu_read_unlock(); return true; already_gone: spin_unlock_irq(&pool->lock); + rcu_read_unlock(); return false; } @@ -3504,7 +3509,7 @@ static void rcu_free_pool(struct rcu_head *rcu) * put_unbound_pool - put a worker_pool * @pool: worker_pool to put * - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU * safe manner. get_unbound_pool() calls this function on its failure path * and this function should be able to release pools which went through, * successfully or not, init_worker_pool(). @@ -3558,7 +3563,7 @@ static void put_unbound_pool(struct worker_pool *pool) del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->mayday_timer); - /* sched-RCU protected to allow dereferences from get_work_pool() */ + /* RCU protected to allow dereferences from get_work_pool() */ call_rcu(&pool->rcu, rcu_free_pool); } @@ -4472,7 +4477,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) struct pool_workqueue *pwq; bool ret; - rcu_read_lock_sched(); + rcu_read_lock(); + preempt_disable(); if (cpu == WORK_CPU_UNBOUND) cpu = smp_processor_id(); @@ -4483,7 +4489,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); ret = !list_empty(&pwq->delayed_works); - rcu_read_unlock_sched(); + preempt_enable(); + rcu_read_unlock(); return ret; } @@ -4509,15 +4516,15 @@ unsigned int work_busy(struct work_struct *work) if (work_pending(work)) ret |= WORK_BUSY_PENDING; - local_irq_save(flags); + rcu_read_lock(); pool = get_work_pool(work); if (pool) { - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); if (find_worker_executing_work(pool, work)) ret |= WORK_BUSY_RUNNING; - spin_unlock(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); } - local_irq_restore(flags); + rcu_read_unlock(); return ret; } @@ -4701,7 +4708,7 @@ void show_workqueue_state(void) unsigned long flags; int pi; - rcu_read_lock_sched(); + rcu_read_lock(); pr_info("Showing busy workqueues and worker pools:\n"); @@ -4766,7 +4773,7 @@ void show_workqueue_state(void) touch_nmi_watchdog(); } - rcu_read_unlock_sched(); + rcu_read_unlock(); } /* used to show worker information through /proc/PID/{comm,stat,status} */ @@ -5153,16 +5160,16 @@ bool freeze_workqueues_busy(void) * nr_active is monotonically decreasing. It's safe * to peek without lock. */ - rcu_read_lock_sched(); + rcu_read_lock(); for_each_pwq(pwq, wq) { WARN_ON_ONCE(pwq->nr_active < 0); if (pwq->nr_active) { busy = true; - rcu_read_unlock_sched(); + rcu_read_unlock(); goto out_unlock; } } - rcu_read_unlock_sched(); + rcu_read_unlock(); } out_unlock: mutex_unlock(&wq_pool_mutex); @@ -5357,7 +5364,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, const char *delim = ""; int node, written = 0; - rcu_read_lock_sched(); + get_online_cpus(); + rcu_read_lock(); for_each_node(node) { written += scnprintf(buf + written, PAGE_SIZE - written, "%s%d:%d", delim, node, @@ -5365,7 +5373,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, delim = " "; } written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); - rcu_read_unlock_sched(); + rcu_read_unlock(); + put_online_cpus(); return written; } -- cgit From 3b15d09f7e6db44065aaba5fd16dc7420035c5ad Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 28 Feb 2019 13:13:26 +0800 Subject: time: Introduce jiffies64_to_msecs() there is a similar helper in net/netfilter/nf_tables_api.c, this maybe become a common request someday, so move it to time.c Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Acked-by: John Stultz Signed-off-by: Pablo Neira Ayuso --- include/linux/jiffies.h | 1 + kernel/time/time.c | 10 ++++++++++ net/netfilter/nf_tables_api.c | 4 +--- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fa928242567d..1b6d31da7cbc 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -297,6 +297,7 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) } extern u64 jiffies64_to_nsecs(u64 j); +extern u64 jiffies64_to_msecs(u64 j); extern unsigned long __msecs_to_jiffies(const unsigned int m); #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) diff --git a/kernel/time/time.c b/kernel/time/time.c index c3f756f8534b..9e3f79d4f5a8 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -783,6 +783,16 @@ u64 jiffies64_to_nsecs(u64 j) } EXPORT_SYMBOL(jiffies64_to_nsecs); +u64 jiffies64_to_msecs(const u64 j) +{ +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +#else + return div_u64(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN); +#endif +} +EXPORT_SYMBOL(jiffies64_to_msecs); + /** * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64 * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 90e6b09ef2af..ee1b0d1445aa 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3193,9 +3193,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) static __be64 nf_jiffies64_to_msecs(u64 input) { - u64 ms = jiffies64_to_nsecs(input); - - return cpu_to_be64(div_u64(ms, NSEC_PER_MSEC)); + return cpu_to_be64(jiffies64_to_msecs(input)); } static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, -- cgit From 699c1868a743f530081f429058616a2dd5d8a4b2 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Mon, 8 Apr 2019 12:50:57 -0400 Subject: audit: purge unnecessary list_empty calls The original conditions that led to the use of list_empty() to optimize list_for_each_entry_rcu() in auditfilter.c and auditsc.c code have been removed without removing the list_empty() call, but this code example has been copied several times. Remove the unnecessary list_empty() calls. Please see upstream github issue https://github.com/linux-audit/audit-kernel/issues/112 Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditfilter.c | 2 -- kernel/auditsc.c | 64 ++++++++++++++++++++++------------------------------ 2 files changed, 27 insertions(+), 39 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 63f8b3f26fab..2c3c2f349b23 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1315,8 +1315,6 @@ int audit_filter(int msgtype, unsigned int listtype) int ret = 1; /* Audit by default */ rcu_read_lock(); - if (list_empty(&audit_filter_list[listtype])) - goto unlock_and_return; list_for_each_entry_rcu(e, &audit_filter_list[listtype], list) { int i, result = 0; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 98a98e6dca05..51a2ceb3a1ca 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -771,15 +771,13 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, return AUDIT_DISABLED; rcu_read_lock(); - if (!list_empty(list)) { - list_for_each_entry_rcu(e, list, list) { - if (audit_in_mask(&e->rule, ctx->major) && - audit_filter_rules(tsk, &e->rule, ctx, NULL, - &state, false)) { - rcu_read_unlock(); - ctx->current_state = state; - return state; - } + list_for_each_entry_rcu(e, list, list) { + if (audit_in_mask(&e->rule, ctx->major) && + audit_filter_rules(tsk, &e->rule, ctx, NULL, + &state, false)) { + rcu_read_unlock(); + ctx->current_state = state; + return state; } } rcu_read_unlock(); @@ -798,9 +796,6 @@ static int audit_filter_inode_name(struct task_struct *tsk, struct audit_entry *e; enum audit_state state; - if (list_empty(list)) - return 0; - list_for_each_entry_rcu(e, list, list) { if (audit_in_mask(&e->rule, ctx->major) && audit_filter_rules(tsk, &e->rule, ctx, n, &state, false)) { @@ -808,7 +803,6 @@ static int audit_filter_inode_name(struct task_struct *tsk, return 1; } } - return 0; } @@ -1945,18 +1939,16 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, return; rcu_read_lock(); - if (!list_empty(list)) { - list_for_each_entry_rcu(e, list, list) { - for (i = 0; i < e->rule.field_count; i++) { - struct audit_field *f = &e->rule.fields[i]; - - if (f->type == AUDIT_FSTYPE - && audit_comparator(inode->i_sb->s_magic, - f->op, f->val) - && e->rule.action == AUDIT_NEVER) { - rcu_read_unlock(); - return; - } + list_for_each_entry_rcu(e, list, list) { + for (i = 0; i < e->rule.field_count; i++) { + struct audit_field *f = &e->rule.fields[i]; + + if (f->type == AUDIT_FSTYPE + && audit_comparator(inode->i_sb->s_magic, + f->op, f->val) + && e->rule.action == AUDIT_NEVER) { + rcu_read_unlock(); + return; } } } @@ -2065,18 +2057,16 @@ void __audit_inode_child(struct inode *parent, return; rcu_read_lock(); - if (!list_empty(list)) { - list_for_each_entry_rcu(e, list, list) { - for (i = 0; i < e->rule.field_count; i++) { - struct audit_field *f = &e->rule.fields[i]; - - if (f->type == AUDIT_FSTYPE - && audit_comparator(parent->i_sb->s_magic, - f->op, f->val) - && e->rule.action == AUDIT_NEVER) { - rcu_read_unlock(); - return; - } + list_for_each_entry_rcu(e, list, list) { + for (i = 0; i < e->rule.field_count; i++) { + struct audit_field *f = &e->rule.fields[i]; + + if (f->type == AUDIT_FSTYPE + && audit_comparator(parent->i_sb->s_magic, + f->op, f->val) + && e->rule.action == AUDIT_NEVER) { + rcu_read_unlock(); + return; } } } -- cgit From d75f773c86a2b8b7278e2c33343b46a4024bc002 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 25 Mar 2019 21:32:28 +0200 Subject: treewide: Switch printk users from %pf and %pF to %ps and %pS, respectively %pF and %pf are functionally equivalent to %pS and %ps conversion specifiers. The former are deprecated, therefore switch the current users to use the preferred variant. The changes have been produced by the following command: git grep -l '%p[fF]' | grep -v '^\(tools\|Documentation\)/' | \ while read i; do perl -i -pe 's/%pf/%ps/g; s/%pF/%pS/g;' $i; done And verifying the result. Link: http://lkml.kernel.org/r/20190325193229.23390-1-sakari.ailus@linux.intel.com Cc: Andy Shevchenko Cc: linux-arm-kernel@lists.infradead.org Cc: sparclinux@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: xen-devel@lists.xenproject.org Cc: linux-acpi@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: drbd-dev@lists.linbit.com Cc: linux-block@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-nvdimm@lists.01.org Cc: linux-pci@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: linux-btrfs@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: linux-mm@kvack.org Cc: ceph-devel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Sakari Ailus Acked-by: David Sterba (for btrfs) Acked-by: Mike Rapoport (for mm/memblock.c) Acked-by: Bjorn Helgaas (for drivers/pci) Acked-by: Rafael J. Wysocki Signed-off-by: Petr Mladek --- arch/alpha/kernel/pci_iommu.c | 20 ++++++++++---------- arch/arm/mach-imx/pm-imx6.c | 2 +- arch/arm/mm/alignment.c | 2 +- arch/arm/nwfpe/fpmodule.c | 2 +- arch/microblaze/mm/pgtable.c | 2 +- arch/sparc/kernel/ds.c | 2 +- arch/um/kernel/sysrq.c | 2 +- arch/x86/include/asm/trace/exceptions.h | 2 +- arch/x86/kernel/irq_64.c | 2 +- arch/x86/mm/extable.c | 4 ++-- arch/x86/xen/multicalls.c | 2 +- drivers/acpi/device_pm.c | 2 +- drivers/base/power/main.c | 6 +++--- drivers/base/syscore.c | 12 ++++++------ drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/floppy.c | 10 +++++----- drivers/cpufreq/cpufreq.c | 2 +- drivers/mmc/core/quirks.h | 2 +- drivers/nvdimm/bus.c | 2 +- drivers/nvdimm/dimm_devs.c | 2 +- drivers/pci/pci-driver.c | 14 +++++++------- drivers/pci/quirks.c | 4 ++-- drivers/pnp/quirks.c | 2 +- drivers/scsi/esp_scsi.c | 2 +- fs/btrfs/tests/free-space-tree-tests.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- fs/pstore/inode.c | 2 +- include/trace/events/btrfs.h | 2 +- include/trace/events/cpuhp.h | 4 ++-- include/trace/events/preemptirq.h | 2 +- include/trace/events/rcu.h | 4 ++-- include/trace/events/sunrpc.h | 2 +- include/trace/events/vmscan.h | 4 ++-- include/trace/events/workqueue.h | 4 ++-- include/trace/events/xen.h | 2 +- init/main.c | 6 +++--- kernel/async.c | 4 ++-- kernel/events/uprobes.c | 2 +- kernel/fail_function.c | 2 +- kernel/irq/debugfs.c | 2 +- kernel/irq/handle.c | 2 +- kernel/irq/manage.c | 2 +- kernel/irq/spurious.c | 4 ++-- kernel/rcu/tree.c | 2 +- kernel/stop_machine.c | 2 +- kernel/time/sched_clock.c | 2 +- kernel/time/timer.c | 2 +- kernel/workqueue.c | 12 ++++++------ lib/error-inject.c | 2 +- lib/percpu-refcount.c | 4 ++-- mm/memblock.c | 14 +++++++------- mm/memory.c | 2 +- mm/vmscan.c | 2 +- net/ceph/osd_client.c | 2 +- net/core/net-procfs.c | 2 +- net/core/netpoll.c | 4 ++-- 56 files changed, 106 insertions(+), 106 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index aa0f50d0f823..4a2ae862b19e 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -237,7 +237,7 @@ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask) ok = 0; /* If both conditions above are met, we are fine. */ - DBGA("pci_dac_dma_supported %s from %pf\n", + DBGA("pci_dac_dma_supported %s from %ps\n", ok ? "yes" : "no", __builtin_return_address(0)); return ok; @@ -269,7 +269,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, && paddr + size <= __direct_map_size) { ret = paddr + __direct_map_base; - DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %pf\n", + DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %ps\n", cpu_addr, size, ret, __builtin_return_address(0)); return ret; @@ -280,7 +280,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, if (dac_allowed) { ret = paddr + alpha_mv.pci_dac_offset; - DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %pf\n", + DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %ps\n", cpu_addr, size, ret, __builtin_return_address(0)); return ret; @@ -317,7 +317,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, ret = arena->dma_base + dma_ofs * PAGE_SIZE; ret += (unsigned long)cpu_addr & ~PAGE_MASK; - DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %pf\n", + DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %ps\n", cpu_addr, size, npages, ret, __builtin_return_address(0)); return ret; @@ -384,14 +384,14 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, && dma_addr < __direct_map_base + __direct_map_size) { /* Nothing to do. */ - DBGA2("pci_unmap_single: direct [%llx,%zx] from %pf\n", + DBGA2("pci_unmap_single: direct [%llx,%zx] from %ps\n", dma_addr, size, __builtin_return_address(0)); return; } if (dma_addr > 0xffffffff) { - DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %pf\n", + DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %ps\n", dma_addr, size, __builtin_return_address(0)); return; } @@ -423,7 +423,7 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, spin_unlock_irqrestore(&arena->lock, flags); - DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %pf\n", + DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %ps\n", dma_addr, size, npages, __builtin_return_address(0)); } @@ -446,7 +446,7 @@ try_again: cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order); if (! cpu_addr) { printk(KERN_INFO "pci_alloc_consistent: " - "get_free_pages failed from %pf\n", + "get_free_pages failed from %ps\n", __builtin_return_address(0)); /* ??? Really atomic allocation? Otherwise we could play with vmalloc and sg if we can't find contiguous memory. */ @@ -465,7 +465,7 @@ try_again: goto try_again; } - DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %pf\n", + DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %ps\n", size, cpu_addr, *dma_addrp, __builtin_return_address(0)); return cpu_addr; @@ -485,7 +485,7 @@ static void alpha_pci_free_coherent(struct device *dev, size_t size, pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); free_pages((unsigned long)cpu_addr, get_order(size)); - DBGA2("pci_free_consistent: [%llx,%zx] from %pf\n", + DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n", dma_addr, size, __builtin_return_address(0)); } diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 87f45b926c78..e67e0b2d4ce0 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -631,7 +631,7 @@ static void imx6_pm_stby_poweroff(void) static int imx6_pm_stby_poweroff_probe(void) { if (pm_power_off) { - pr_warn("%s: pm_power_off already claimed %p %pf!\n", + pr_warn("%s: pm_power_off already claimed %p %ps!\n", __func__, pm_power_off, pm_power_off); return -EBUSY; } diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index b54f8f8def36..e376883ab35b 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -133,7 +133,7 @@ static const char *usermode_action[] = { static int alignment_proc_show(struct seq_file *m, void *v) { seq_printf(m, "User:\t\t%lu\n", ai_user); - seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc); + seq_printf(m, "System:\t\t%lu (%pS)\n", ai_sys, ai_sys_last_pc); seq_printf(m, "Skipped:\t%lu\n", ai_skipped); seq_printf(m, "Half:\t\t%lu\n", ai_half); seq_printf(m, "Word:\t\t%lu\n", ai_word); diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c index 1365e8650843..ee34c76e6624 100644 --- a/arch/arm/nwfpe/fpmodule.c +++ b/arch/arm/nwfpe/fpmodule.c @@ -147,7 +147,7 @@ void float_raise(signed char flags) #ifdef CONFIG_DEBUG_USER if (flags & debug) printk(KERN_DEBUG - "NWFPE: %s[%d] takes exception %08x at %pf from %08lx\n", + "NWFPE: %s[%d] takes exception %08x at %ps from %08lx\n", current->comm, current->pid, flags, __builtin_return_address(0), GET_USERREG()->ARM_pc); #endif diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index c2ce1e42b888..8fe54fda31dc 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -75,7 +75,7 @@ static void __iomem *__ioremap(phys_addr_t addr, unsigned long size, p >= memory_start && p < virt_to_phys(high_memory) && !(p >= __virt_to_phys((phys_addr_t)__bss_stop) && p < __virt_to_phys((phys_addr_t)__bss_stop))) { - pr_warn("__ioremap(): phys addr "PTE_FMT" is RAM lr %pf\n", + pr_warn("__ioremap(): phys addr "PTE_FMT" is RAM lr %ps\n", (unsigned long)p, __builtin_return_address(0)); return NULL; } diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index f87265afb175..cad08ccce625 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -876,7 +876,7 @@ void ldom_power_off(void) static void ds_conn_reset(struct ds_info *dp) { - printk(KERN_ERR "ds-%llu: ds_conn_reset() from %pf\n", + printk(KERN_ERR "ds-%llu: ds_conn_reset() from %ps\n", dp->id, __builtin_return_address(0)); } diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 6b995e870d55..05585eef11d9 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -20,7 +20,7 @@ static void _print_addr(void *data, unsigned long address, int reliable) { - pr_info(" [<%08lx>] %s%pF\n", address, reliable ? "" : "? ", + pr_info(" [<%08lx>] %s%pS\n", address, reliable ? "" : "? ", (void *)address); } diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index e0e6d7f21399..6b1e87194809 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -30,7 +30,7 @@ DECLARE_EVENT_CLASS(x86_exceptions, __entry->error_code = error_code; ), - TP_printk("address=%pf ip=%pf error_code=0x%lx", + TP_printk("address=%ps ip=%ps error_code=0x%lx", (void *)__entry->address, (void *)__entry->ip, __entry->error_code) ); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0469cd078db1..4dff56658427 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -58,7 +58,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pS)\n", current->comm, curbase, regs->sp, irq_stack_top, irq_stack_bottom, estack_top, estack_bottom, (void *)regs->ip); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 3c4568f8fb28..b0a2de8d2f9e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -145,7 +145,7 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) show_stack_regs(regs); @@ -162,7 +162,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) show_stack_regs(regs); diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 0766a08bdf45..07054572297f 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -105,7 +105,7 @@ void xen_mc_flush(void) for (i = 0; i < b->mcidx; i++) { if (b->entries[i].result < 0) { #if MC_DEBUG - pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n", + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pS\n", i + 1, b->debug[i].op, b->debug[i].args[0], diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 824ae985ad93..1aa0d014dc34 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -414,7 +414,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) if (adev->wakeup.flags.notifier_present) { pm_wakeup_ws_event(adev->wakeup.ws, 0, acpi_s2idle_wakeup()); if (adev->wakeup.context.func) { - acpi_handle_debug(handle, "Running %pF for %s\n", + acpi_handle_debug(handle, "Running %pS for %s\n", adev->wakeup.context.func, dev_name(adev->wakeup.context.dev)); adev->wakeup.context.func(&adev->wakeup.context); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 5a8149829ab3..c88f56b9ae5b 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -205,7 +205,7 @@ static ktime_t initcall_debug_start(struct device *dev, void *cb) if (!pm_print_times_enabled) return 0; - dev_info(dev, "calling %pF @ %i, parent: %s\n", cb, + dev_info(dev, "calling %pS @ %i, parent: %s\n", cb, task_pid_nr(current), dev->parent ? dev_name(dev->parent) : "none"); return ktime_get(); @@ -223,7 +223,7 @@ static void initcall_debug_report(struct device *dev, ktime_t calltime, rettime = ktime_get(); nsecs = (s64) ktime_to_ns(ktime_sub(rettime, calltime)); - dev_info(dev, "%pF returned %d after %Ld usecs\n", cb, error, + dev_info(dev, "%pS returned %d after %Ld usecs\n", cb, error, (unsigned long long)nsecs >> 10); } @@ -2062,7 +2062,7 @@ EXPORT_SYMBOL_GPL(dpm_suspend_start); void __suspend_report_result(const char *function, void *fn, int ret) { if (ret) - printk(KERN_ERR "%s(): %pF returns %d\n", function, fn, ret); + printk(KERN_ERR "%s(): %pS returns %d\n", function, fn, ret); } EXPORT_SYMBOL_GPL(__suspend_report_result); diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c index 6e076f359dcc..0d346a307140 100644 --- a/drivers/base/syscore.c +++ b/drivers/base/syscore.c @@ -62,19 +62,19 @@ int syscore_suspend(void) list_for_each_entry_reverse(ops, &syscore_ops_list, node) if (ops->suspend) { if (initcall_debug) - pr_info("PM: Calling %pF\n", ops->suspend); + pr_info("PM: Calling %pS\n", ops->suspend); ret = ops->suspend(); if (ret) goto err_out; WARN_ONCE(!irqs_disabled(), - "Interrupts enabled after %pF\n", ops->suspend); + "Interrupts enabled after %pS\n", ops->suspend); } trace_suspend_resume(TPS("syscore_suspend"), 0, false); return 0; err_out: - pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend); + pr_err("PM: System core suspend callback %pS failed.\n", ops->suspend); list_for_each_entry_continue(ops, &syscore_ops_list, node) if (ops->resume) @@ -100,10 +100,10 @@ void syscore_resume(void) list_for_each_entry(ops, &syscore_ops_list, node) if (ops->resume) { if (initcall_debug) - pr_info("PM: Calling %pF\n", ops->resume); + pr_info("PM: Calling %pS\n", ops->resume); ops->resume(); WARN_ONCE(!irqs_disabled(), - "Interrupts enabled after %pF\n", ops->resume); + "Interrupts enabled after %pS\n", ops->resume); } trace_suspend_resume(TPS("syscore_resume"), 0, false); } @@ -122,7 +122,7 @@ void syscore_shutdown(void) list_for_each_entry_reverse(ops, &syscore_ops_list, node) if (ops->shutdown) { if (initcall_debug) - pr_info("PM: Calling %pF\n", ops->shutdown); + pr_info("PM: Calling %pS\n", ops->shutdown); ops->shutdown(); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c7ad88d91a09..3e5fd97a3b4d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -6116,7 +6116,7 @@ int drbd_ack_receiver(struct drbd_thread *thi) err = cmd->fn(connection, &pi); if (err) { - drbd_err(connection, "%pf failed\n", cmd->fn); + drbd_err(connection, "%ps failed\n", cmd->fn); goto reconnect; } diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 95f608d1a098..49f89db0766f 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1693,7 +1693,7 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) /* we don't even know which FDC is the culprit */ pr_info("DOR0=%x\n", fdc_state[0].dor); pr_info("floppy interrupt on bizarre fdc %d\n", fdc); - pr_info("handler=%pf\n", handler); + pr_info("handler=%ps\n", handler); is_alive(__func__, "bizarre fdc"); return IRQ_NONE; } @@ -1752,7 +1752,7 @@ static void reset_interrupt(void) debugt(__func__, ""); result(); /* get the status ready for set_fdc */ if (FDCS->reset) { - pr_info("reset set in interrupt, calling %pf\n", cont->error); + pr_info("reset set in interrupt, calling %ps\n", cont->error); cont->error(); /* a reset just after a reset. BAD! */ } cont->redo(); @@ -1793,7 +1793,7 @@ static void show_floppy(void) pr_info("\n"); pr_info("floppy driver state\n"); pr_info("-------------------\n"); - pr_info("now=%lu last interrupt=%lu diff=%lu last called handler=%pf\n", + pr_info("now=%lu last interrupt=%lu diff=%lu last called handler=%ps\n", jiffies, interruptjiffies, jiffies - interruptjiffies, lasthandler); @@ -1812,9 +1812,9 @@ static void show_floppy(void) pr_info("status=%x\n", fd_inb(FD_STATUS)); pr_info("fdc_busy=%lu\n", fdc_busy); if (do_floppy) - pr_info("do_floppy=%pf\n", do_floppy); + pr_info("do_floppy=%ps\n", do_floppy); if (work_pending(&floppy_work)) - pr_info("floppy_work.func=%pf\n", floppy_work.func); + pr_info("floppy_work.func=%ps\n", floppy_work.func); if (delayed_work_pending(&fd_timer)) pr_info("delayed work.function=%p expires=%ld\n", fd_timer.work.func, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0e626b00053b..d87ebc2d2e68 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -432,7 +432,7 @@ static void cpufreq_list_transition_notifiers(void) mutex_lock(&cpufreq_transition_notifier_list.mutex); for (nb = cpufreq_transition_notifier_list.head; nb; nb = nb->next) - pr_info("%pF\n", nb->notifier_call); + pr_info("%pS\n", nb->notifier_call); mutex_unlock(&cpufreq_transition_notifier_list.mutex); } diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index dd2f73af8f2c..2d2d9ea8be4f 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -159,7 +159,7 @@ static inline void mmc_fixup_device(struct mmc_card *card, (f->ext_csd_rev == EXT_CSD_REV_ANY || f->ext_csd_rev == card->ext_csd.rev) && rev >= f->rev_start && rev <= f->rev_end) { - dev_dbg(&card->dev, "calling %pf\n", f->vendor_fixup); + dev_dbg(&card->dev, "calling %ps\n", f->vendor_fixup); f->vendor_fixup(card, f->data); } } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 7bbff0af29b2..7ff684159f29 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -581,7 +581,7 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, struct device_driver *drv = &nd_drv->drv; if (!nd_drv->type) { - pr_debug("driver type bitmask not set (%pf)\n", + pr_debug("driver type bitmask not set (%ps)\n", __builtin_return_address(0)); return -EINVAL; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index efe412a6b5b9..06f5087547ea 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -53,7 +53,7 @@ static int validate_dimm(struct nvdimm_drvdata *ndd) rc = nvdimm_check_config_data(ndd->dev); if (rc) - dev_dbg(ndd->dev, "%pf: %s error: %d\n", + dev_dbg(ndd->dev, "%ps: %s error: %d\n", __builtin_return_address(0), __func__, rc); return rc; } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 79b1610a8beb..11a877461584 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -578,7 +578,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, - "PCI PM: Device state not saved by %pF\n", + "PCI PM: Device state not saved by %pS\n", drv->suspend); } } @@ -605,7 +605,7 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, - "PCI PM: Device state not saved by %pF\n", + "PCI PM: Device state not saved by %pS\n", drv->suspend_late); goto Fixup; } @@ -773,7 +773,7 @@ static int pci_pm_suspend(struct device *dev) if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, - "PCI PM: State of device not saved by %pF\n", + "PCI PM: State of device not saved by %pS\n", pm->suspend); } } @@ -821,7 +821,7 @@ static int pci_pm_suspend_noirq(struct device *dev) if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, - "PCI PM: State of device not saved by %pF\n", + "PCI PM: State of device not saved by %pS\n", pm->suspend_noirq); goto Fixup; } @@ -1260,11 +1260,11 @@ static int pci_pm_runtime_suspend(struct device *dev) * log level. */ if (error == -EBUSY || error == -EAGAIN) { - dev_dbg(dev, "can't suspend now (%pf returned %d)\n", + dev_dbg(dev, "can't suspend now (%ps returned %d)\n", pm->runtime_suspend, error); return error; } else if (error) { - dev_err(dev, "can't suspend (%pf returned %d)\n", + dev_err(dev, "can't suspend (%ps returned %d)\n", pm->runtime_suspend, error); return error; } @@ -1276,7 +1276,7 @@ static int pci_pm_runtime_suspend(struct device *dev) && !pci_dev->state_saved && pci_dev->current_state != PCI_D0 && pci_dev->current_state != PCI_UNKNOWN) { WARN_ONCE(pci_dev->current_state != prev, - "PCI PM: State of device not saved by %pF\n", + "PCI PM: State of device not saved by %pS\n", pm->runtime_suspend); return 0; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e2a879e93d86..56cd4c4a170c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -36,7 +36,7 @@ static ktime_t fixup_debug_start(struct pci_dev *dev, void (*fn)(struct pci_dev *dev)) { if (initcall_debug) - pci_info(dev, "calling %pF @ %i\n", fn, task_pid_nr(current)); + pci_info(dev, "calling %pS @ %i\n", fn, task_pid_nr(current)); return ktime_get(); } @@ -51,7 +51,7 @@ static void fixup_debug_report(struct pci_dev *dev, ktime_t calltime, delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; if (initcall_debug || duration > 10000) - pci_info(dev, "%pF took %lld usecs\n", fn, duration); + pci_info(dev, "%pS took %lld usecs\n", fn, duration); } static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 803666ae3635..de99f371d362 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -458,7 +458,7 @@ void pnp_fixup_device(struct pnp_dev *dev) for (f = pnp_fixups; *f->id; f++) { if (!compare_pnp_id(dev->id, f->id)) continue; - pnp_dbg(&dev->dev, "%s: calling %pF\n", f->id, + pnp_dbg(&dev->dev, "%s: calling %pS\n", f->id, f->quirk_function); f->quirk_function(dev); } diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index 465df475f753..76fd02ccbf49 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -1031,7 +1031,7 @@ static int esp_check_spur_intr(struct esp *esp) static void esp_schedule_reset(struct esp *esp) { - esp_log_reset("esp_schedule_reset() from %pf\n", + esp_log_reset("esp_schedule_reset() from %ps\n", __builtin_return_address(0)); esp->flags |= ESP_FLAG_RESETTING; esp_event(esp, ESP_EVENT_RESET); diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 89346da890cf..f7a969b986eb 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -539,7 +539,7 @@ static int run_test_both_formats(test_func_t test_func, u32 sectorsize, ret = run_test(test_func, 0, sectorsize, nodesize, alignment); if (ret) { test_err( - "%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u", + "%ps failed with extents, sectorsize=%u, nodesize=%u, alignment=%u", test_func, sectorsize, nodesize, alignment); test_ret = ret; } @@ -547,7 +547,7 @@ static int run_test_both_formats(test_func_t test_func, u32 sectorsize, ret = run_test(test_func, 1, sectorsize, nodesize, alignment); if (ret) { test_err( - "%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u", + "%ps failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u", test_func, sectorsize, nodesize, alignment); test_ret = ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8f23ee6e8eb9..02d0cd199828 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1337,7 +1337,7 @@ struct f2fs_private_dio { #ifdef CONFIG_F2FS_FAULT_INJECTION #define f2fs_show_injection_info(type) \ - printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ + printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \ KERN_INFO, f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index c60ee46f3e39..29e94e0b6d73 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -115,7 +115,7 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v) rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off); - seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n", + seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n", pstore_ftrace_decode_cpu(rec), pstore_ftrace_read_timestamp(rec), rec->ip, rec->parent_ip, (void *)rec->ip, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index ab1cc33adbac..b9b7465be5eb 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1345,7 +1345,7 @@ DECLARE_EVENT_CLASS(btrfs__work, __entry->normal_work = &work->normal_work; ), - TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%pf ordered_func=%p " + TP_printk_btrfs("work=%p (normal_work=%p) wq=%p func=%ps ordered_func=%p " "ordered_free=%p", __entry->work, __entry->normal_work, __entry->wq, __entry->func, __entry->ordered_func, __entry->ordered_free) diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h index fe1d6e8cd99d..ad16f77310c6 100644 --- a/include/trace/events/cpuhp.h +++ b/include/trace/events/cpuhp.h @@ -30,7 +30,7 @@ TRACE_EVENT(cpuhp_enter, __entry->fun = fun; ), - TP_printk("cpu: %04u target: %3d step: %3d (%pf)", + TP_printk("cpu: %04u target: %3d step: %3d (%ps)", __entry->cpu, __entry->target, __entry->idx, __entry->fun) ); @@ -58,7 +58,7 @@ TRACE_EVENT(cpuhp_multi_enter, __entry->fun = fun; ), - TP_printk("cpu: %04u target: %3d step: %3d (%pf)", + TP_printk("cpu: %04u target: %3d step: %3d (%ps)", __entry->cpu, __entry->target, __entry->idx, __entry->fun) ); diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h index 9a0d4ceeb166..95fba0471e5b 100644 --- a/include/trace/events/preemptirq.h +++ b/include/trace/events/preemptirq.h @@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(preemptirq_template, __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext); ), - TP_printk("caller=%pF parent=%pF", + TP_printk("caller=%pS parent=%pS", (void *)((unsigned long)(_stext) + __entry->caller_offs), (void *)((unsigned long)(_stext) + __entry->parent_offs)) ); diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index f0c4d10e614b..80339fd14c1c 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -491,7 +491,7 @@ TRACE_EVENT(rcu_callback, __entry->qlen = qlen; ), - TP_printk("%s rhp=%p func=%pf %ld/%ld", + TP_printk("%s rhp=%p func=%ps %ld/%ld", __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen_lazy, __entry->qlen) ); @@ -587,7 +587,7 @@ TRACE_EVENT(rcu_invoke_callback, __entry->func = rhp->func; ), - TP_printk("%s rhp=%p func=%pf", + TP_printk("%s rhp=%p func=%ps", __entry->rcuname, __entry->rhp, __entry->func) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 0d5d0d91f861..7b8b987d332e 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf", + TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%ps", __entry->task_id, __entry->client_id, __entry->flags, __entry->runstate, diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index a1cb91342231..252327dbfa51 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -226,7 +226,7 @@ TRACE_EVENT(mm_shrink_slab_start, __entry->priority = priority; ), - TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", + TP_printk("%pS %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", __entry->shrink, __entry->shr, __entry->nid, @@ -265,7 +265,7 @@ TRACE_EVENT(mm_shrink_slab_end, __entry->total_scan = total_scan; ), - TP_printk("%pF %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", + TP_printk("%pS %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", __entry->shrink, __entry->shr, __entry->nid, diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 9a761bc6a251..e172549283be 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -60,7 +60,7 @@ TRACE_EVENT(workqueue_queue_work, __entry->cpu = pwq->pool->cpu; ), - TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", + TP_printk("work struct=%p function=%ps workqueue=%p req_cpu=%u cpu=%u", __entry->work, __entry->function, __entry->workqueue, __entry->req_cpu, __entry->cpu) ); @@ -102,7 +102,7 @@ TRACE_EVENT(workqueue_execute_start, __entry->function = work->func; ), - TP_printk("work struct %p: function %pf", __entry->work, __entry->function) + TP_printk("work struct %p: function %ps", __entry->work, __entry->function) ); /** diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index fdcf88bcf0ea..9a0e8af21310 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -73,7 +73,7 @@ TRACE_EVENT(xen_mc_callback, __entry->fn = fn; __entry->data = data; ), - TP_printk("callback %pf, data %p", + TP_printk("callback %ps, data %p", __entry->fn, __entry->data) ); diff --git a/init/main.c b/init/main.c index c86a1c8f19f4..3192c6cc5382 100644 --- a/init/main.c +++ b/init/main.c @@ -826,7 +826,7 @@ trace_initcall_start_cb(void *data, initcall_t fn) { ktime_t *calltime = (ktime_t *)data; - printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); + printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current)); *calltime = ktime_get(); } @@ -840,7 +840,7 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) rettime = ktime_get(); delta = ktime_sub(rettime, *calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", fn, ret, duration); } @@ -897,7 +897,7 @@ int __init_or_module do_one_initcall(initcall_t fn) strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf)); local_irq_enable(); } - WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf); + WARN(msgbuf[0], "initcall %pS returned with %s\n", fn, msgbuf); add_latent_entropy(); return ret; diff --git a/kernel/async.c b/kernel/async.c index f6bd0d9885e1..12c332e4e13e 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -119,7 +119,7 @@ static void async_run_entry_fn(struct work_struct *work) /* 1) run (and print duration) */ if (initcall_debug && system_state < SYSTEM_RUNNING) { - pr_debug("calling %lli_%pF @ %i\n", + pr_debug("calling %lli_%pS @ %i\n", (long long)entry->cookie, entry->func, task_pid_nr(current)); calltime = ktime_get(); @@ -128,7 +128,7 @@ static void async_run_entry_fn(struct work_struct *work) if (initcall_debug && system_state < SYSTEM_RUNNING) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); - pr_debug("initcall %lli_%pF returned 0 after %lld usecs\n", + pr_debug("initcall %lli_%pS returned 0 after %lld usecs\n", (long long)entry->cookie, entry->func, (long long)ktime_to_ns(delta) >> 10); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index affa830a198c..48abc0f18eae 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2028,7 +2028,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) if (uc->handler) { rc = uc->handler(uc, regs); WARN(rc & ~UPROBE_HANDLER_MASK, - "bad rc=0x%x from %pf()\n", rc, uc->handler); + "bad rc=0x%x from %ps()\n", rc, uc->handler); } if (uc->ret_handler) diff --git a/kernel/fail_function.c b/kernel/fail_function.c index 17f75b545f66..feb80712b913 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -210,7 +210,7 @@ static int fei_seq_show(struct seq_file *m, void *v) { struct fei_attr *attr = list_entry(v, struct fei_attr, list); - seq_printf(m, "%pf\n", attr->kp.addr); + seq_printf(m, "%ps\n", attr->kp.addr); return 0; } diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 516c00a5e867..c1eccd4f6520 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -152,7 +152,7 @@ static int irq_debug_show(struct seq_file *m, void *p) raw_spin_lock_irq(&desc->lock); data = irq_desc_get_irq_data(desc); - seq_printf(m, "handler: %pf\n", desc->handle_irq); + seq_printf(m, "handler: %ps\n", desc->handle_irq); seq_printf(m, "device: %s\n", desc->dev_name); seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors); irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states, diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 6df5ddfdb0f8..a4ace611f47f 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -149,7 +149,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags res = action->handler(irq, action->dev_id); trace_irq_handler_exit(irq, action, res); - if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n", + if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", irq, action->handler)) local_irq_disable(); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9ec34a2a6638..ec43ab2fdfda 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -778,7 +778,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) ret = 0; break; default: - pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", + pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n", flags, irq_desc_get_irq(desc), chip->irq_set_type); } if (unmask) diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 6d2fa6914b30..2ed97a7c9b2a 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -212,9 +212,9 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) */ raw_spin_lock_irqsave(&desc->lock, flags); for_each_action_of_desc(desc, action) { - printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler); + printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler); if (action->thread_fn) - printk(KERN_CONT " threaded [<%p>] %pf", + printk(KERN_CONT " threaded [<%p>] %ps", action->thread_fn, action->thread_fn); printk(KERN_CONT "\n"); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index acd6ccf56faf..8eee921b384d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2870,7 +2870,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy) * Use rcu:rcu_callback trace event to find the previous * time callback was passed to __call_rcu(). */ - WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n", + WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pS()!!!\n", head, head->func); WRITE_ONCE(head->func, rcu_leak_callback); return; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 067cb83f37ea..7231fb5953fc 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -513,7 +513,7 @@ repeat: } preempt_count_dec(); WARN_ONCE(preempt_count(), - "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg); + "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg); goto repeat; } } diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 094b82ca95e5..1002cf61700a 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -231,7 +231,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) enable_sched_clock_irqtime(); - pr_debug("Registered %pF as sched_clock source\n", read); + pr_debug("Registered %pS as sched_clock source\n", read); } void __init generic_sched_clock_init(void) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2fce056f8a49..6502c3ed317e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1328,7 +1328,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list lock_map_release(&lockdep_map); if (count != preempt_count()) { - WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", + WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n", fn, count, preempt_count()); /* * Restore the preempt count. That gives us a decent diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7abbeed13421..8ea9e4fb8cc6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2277,7 +2277,7 @@ __acquires(&pool->lock) if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" - " last function: %pf\n", + " last function: %ps\n", current->comm, preempt_count(), task_pid_nr(current), worker->current_func); debug_show_held_locks(current); @@ -2596,11 +2596,11 @@ static void check_flush_dependency(struct workqueue_struct *target_wq, worker = current_wq_worker(); WARN_ONCE(current->flags & PF_MEMALLOC, - "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", + "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps", current->pid, current->comm, target_wq->name, target_func); WARN_ONCE(worker && ((worker->current_pwq->wq->flags & (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM), - "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", + "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps", worker->current_pwq->wq->name, worker->current_func, target_wq->name, target_func); } @@ -4582,7 +4582,7 @@ void print_worker_info(const char *log_lvl, struct task_struct *task) probe_kernel_read(desc, worker->desc, sizeof(desc) - 1); if (fn || name[0] || desc[0]) { - printk("%sWorkqueue: %s %pf", log_lvl, name, fn); + printk("%sWorkqueue: %s %ps", log_lvl, name, fn); if (strcmp(name, desc)) pr_cont(" (%s)", desc); pr_cont("\n"); @@ -4607,7 +4607,7 @@ static void pr_cont_work(bool comma, struct work_struct *work) pr_cont("%s BAR(%d)", comma ? "," : "", task_pid_nr(barr->task)); } else { - pr_cont("%s %pf", comma ? "," : "", work->func); + pr_cont("%s %ps", comma ? "," : "", work->func); } } @@ -4639,7 +4639,7 @@ static void show_pwq(struct pool_workqueue *pwq) if (worker->current_pwq != pwq) continue; - pr_cont("%s %d%s:%pf", comma ? "," : "", + pr_cont("%s %d%s:%ps", comma ? "," : "", task_pid_nr(worker->task), worker == pwq->wq->rescuer ? "(RESCUER)" : "", worker->current_func); diff --git a/lib/error-inject.c b/lib/error-inject.c index c0d4600f4896..aa63751c916f 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -189,7 +189,7 @@ static int ei_seq_show(struct seq_file *m, void *v) { struct ei_entry *ent = list_entry(v, struct ei_entry, list); - seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr, + seq_printf(m, "%ps\t%s\n", (void *)ent->start_addr, error_type_string(ent->etype)); return 0; } diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9877682e49c7..da54318d3b55 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -151,7 +151,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); WARN_ONCE(atomic_long_read(&ref->count) <= 0, - "percpu ref (%pf) <= 0 (%ld) after switching to atomic", + "percpu ref (%ps) <= 0 (%ld) after switching to atomic", ref->release, atomic_long_read(&ref->count)); /* @ref is viewed as dead on all CPUs, send out switch confirmation */ @@ -333,7 +333,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, spin_lock_irqsave(&percpu_ref_switch_lock, flags); WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, - "%s called more than once on %pf!", __func__, ref->release); + "%s called more than once on %ps!", __func__, ref->release); ref->percpu_count_ptr |= __PERCPU_REF_DEAD; __percpu_ref_switch_mode(ref, confirm_kill); diff --git a/mm/memblock.c b/mm/memblock.c index 470601115892..9db5e51babd2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -701,7 +701,7 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; - memblock_dbg("memblock_add: [%pa-%pa] %pF\n", + memblock_dbg("memblock_add: [%pa-%pa] %pS\n", &base, &end, (void *)_RET_IP_); return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); @@ -820,7 +820,7 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; - memblock_dbg(" memblock_free: [%pa-%pa] %pF\n", + memblock_dbg(" memblock_free: [%pa-%pa] %pS\n", &base, &end, (void *)_RET_IP_); kmemleak_free_part_phys(base, size); @@ -831,7 +831,7 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; - memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n", + memblock_dbg("memblock_reserve: [%pa-%pa] %pS\n", &base, &end, (void *)_RET_IP_); return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0); @@ -1466,7 +1466,7 @@ void * __init memblock_alloc_try_nid_raw( { void *ptr; - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); @@ -1502,7 +1502,7 @@ void * __init memblock_alloc_try_nid_nopanic( { void *ptr; - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); @@ -1538,7 +1538,7 @@ void * __init memblock_alloc_try_nid( { void *ptr; - memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n", + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n", __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr, (void *)_RET_IP_); ptr = memblock_alloc_internal(size, align, @@ -1567,7 +1567,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) phys_addr_t cursor, end; end = base + size - 1; - memblock_dbg("%s: [%pa-%pa] %pF\n", + memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, &base, &end, (void *)_RET_IP_); kmemleak_free_part_phys(base, size); cursor = PFN_UP(base); diff --git a/mm/memory.c b/mm/memory.c index 47fe250307c7..3541a15067f2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -519,7 +519,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, dump_page(page, "bad pte"); pr_alert("addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); - pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n", + pr_alert("file:%pD fault:%ps mmap:%ps readpage:%ps\n", vma->vm_file, vma->vm_ops ? vma->vm_ops->fault : NULL, vma->vm_file ? vma->vm_file->f_op->mmap : NULL, diff --git a/mm/vmscan.c b/mm/vmscan.c index a5ad0b35ab8e..90648187f622 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -493,7 +493,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, total_scan += delta; if (total_scan < 0) { - pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n", + pr_err("shrink_slab: %pS negative objects to delete nr=%ld\n", shrinker->scan_objects, total_scan); total_scan = freeable; next_deferred = nr; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index fa9530dd876e..6f739de28918 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2398,7 +2398,7 @@ static void finish_request(struct ceph_osd_request *req) static void __complete_request(struct ceph_osd_request *req) { - dout("%s req %p tid %llu cb %pf result %d\n", __func__, req, + dout("%s req %p tid %llu cb %ps result %d\n", __func__, req, req->r_tid, req->r_callback, req->r_result); if (req->r_callback) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 63881f72ef71..36347933ec3a 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -258,7 +258,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v) else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s %pf\n", + seq_printf(seq, " %-8s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 361aabffb8c0..bf5446192d6a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -149,7 +149,7 @@ static void poll_one_napi(struct napi_struct *napi) * indicate that we are clearing the Tx path only. */ work = napi->poll(napi, 0); - WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll); + WARN_ONCE(work, "%pS exceeded budget in poll\n", napi->poll); trace_napi_poll(napi, work, 0); clear_bit(NAPI_STATE_NPSVC, &napi->state); @@ -346,7 +346,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } WARN_ONCE(!irqs_disabled(), - "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", + "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n", dev->name, dev->netdev_ops->ndo_start_xmit); } -- cgit From d8eca5bbb2be9bc7546f9e733786fa2f1a594c67 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:03 +0200 Subject: bpf: implement lookup-free direct value access for maps This generic extension to BPF maps allows for directly loading an address residing inside a BPF map value as a single BPF ldimm64 instruction! The idea is similar to what BPF_PSEUDO_MAP_FD does today, which is a special src_reg flag for ldimm64 instruction that indicates that inside the first part of the double insns's imm field is a file descriptor which the verifier then replaces as a full 64bit address of the map into both imm parts. For the newly added BPF_PSEUDO_MAP_VALUE src_reg flag, the idea is the following: the first part of the double insns's imm field is again a file descriptor corresponding to the map, and the second part of the imm field is an offset into the value. The verifier will then replace both imm parts with an address that points into the BPF map value at the given value offset for maps that support this operation. Currently supported is array map with single entry. It is possible to support more than just single map element by reusing both 16bit off fields of the insns as a map index, so full array map lookup could be expressed that way. It hasn't been implemented here due to lack of concrete use case, but could easily be done so in future in a compatible way, since both off fields right now have to be 0 and would correctly denote a map index 0. The BPF_PSEUDO_MAP_VALUE is a distinct flag as otherwise with BPF_PSEUDO_MAP_FD we could not differ offset 0 between load of map pointer versus load of map's value at offset 0, and changing BPF_PSEUDO_MAP_FD's encoding into off by one to differ between regular map pointer and map value pointer would add unnecessary complexity and increases barrier for debugability thus less suitable. Using the second part of the imm field as an offset into the value does /not/ come with limitations since maximum possible value size is in u32 universe anyway. This optimization allows for efficiently retrieving an address to a map value memory area without having to issue a helper call which needs to prepare registers according to calling convention, etc, without needing the extra NULL test, and without having to add the offset in an additional instruction to the value base pointer. The verifier then treats the destination register as PTR_TO_MAP_VALUE with constant reg->off from the user passed offset from the second imm field, and guarantees that this is within bounds of the map value. Any subsequent operations are normally treated as typical map value handling without anything extra needed from verification side. The two map operations for direct value access have been added to array map for now. In future other types could be supported as well depending on the use case. The main use case for this commit is to allow for BPF loader support for global variables that reside in .data/.rodata/.bss sections such that we can directly load the address of them with minimal additional infrastructure required. Loader support has been added in subsequent commits for libbpf library. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 +++ include/linux/bpf_verifier.h | 4 ++ include/uapi/linux/bpf.h | 13 +++++- kernel/bpf/arraymap.c | 32 +++++++++++++++ kernel/bpf/core.c | 3 +- kernel/bpf/disasm.c | 5 ++- kernel/bpf/syscall.c | 28 +++++++++---- kernel/bpf/verifier.c | 86 ++++++++++++++++++++++++++++++--------- tools/bpf/bpftool/xlated_dumper.c | 3 ++ 9 files changed, 149 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a445194b5fb6..bd93a592dd29 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -57,6 +57,12 @@ struct bpf_map_ops { const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); + + /* Direct value access helpers. */ + int (*map_direct_value_addr)(const struct bpf_map *map, + u64 *imm, u32 off); + int (*map_direct_value_meta)(const struct bpf_map *map, + u64 imm, u32 *off); }; struct bpf_map { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index fc8254d6b569..b3ab61fe1932 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -224,6 +224,10 @@ struct bpf_insn_aux_data { unsigned long map_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ u32 alu_limit; /* limit for add/sub register with pointer */ + struct { + u32 map_index; /* index into used_maps[] */ + u32 map_off; /* offset from value base address */ + }; }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 837024512baf..26cfb5b2c964 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -255,8 +255,19 @@ enum bpf_attach_type { */ #define BPF_F_ANY_ALIGNMENT (1U << 1) -/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ +/* When BPF ldimm64's insn[0].src_reg != 0 then this can have + * two extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd map fd + * insn[1].imm: 0 offset into value + * insn[0].off: 0 0 + * insn[1].off: 0 0 + * ldimm64 rewrite: address of map address of map[0]+offset + * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_FD 1 +#define BPF_PSEUDO_MAP_VALUE 2 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c72e0d8e1e65..1a6e9861d554 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -160,6 +160,36 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) return array->value + array->elem_size * (index & array->index_mask); } +static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, + u32 off) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + + if (map->max_entries != 1) + return -ENOTSUPP; + if (off >= map->value_size) + return -EINVAL; + + *imm = (unsigned long)array->value; + return 0; +} + +static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, + u32 *off) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u64 base = (unsigned long)array->value; + u64 range = array->elem_size; + + if (map->max_entries != 1) + return -ENOTSUPP; + if (imm < base || imm >= base + range) + return -ENOENT; + + *off = imm - base; + return 0; +} + /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { @@ -419,6 +449,8 @@ const struct bpf_map_ops array_map_ops = { .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_gen_lookup = array_map_gen_lookup, + .map_direct_value_addr = array_map_direct_value_addr, + .map_direct_value_meta = array_map_direct_value_meta, .map_seq_show_elem = array_map_seq_show_elem, .map_check_btf = array_map_check_btf, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2966cb368bf4..ace8c22c8b0e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -292,7 +292,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) dst[i] = fp->insnsi[i]; if (!was_ld_map && dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) && - dst[i].src_reg == BPF_PSEUDO_MAP_FD) { + (dst[i].src_reg == BPF_PSEUDO_MAP_FD || + dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { was_ld_map = true; dst[i].imm = 0; } else if (was_ld_map && diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index de73f55e42fd..d9ce383c0f9c 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -205,10 +205,11 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, * part of the ldimm64 insn is accessible. */ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; - bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD; + bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD || + insn->src_reg == BPF_PSEUDO_MAP_VALUE; char tmp[64]; - if (map_ptr && !allow_ptr_leaks) + if (is_ptr && !allow_ptr_leaks) imm = 0; verbose(cbs->private_data, "(%02x) r%d = %s\n", diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1d65e56594db..828518bb947b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2072,13 +2072,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) } static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, - unsigned long addr) + unsigned long addr, u32 *off, + u32 *type) { + const struct bpf_map *map; int i; - for (i = 0; i < prog->aux->used_map_cnt; i++) - if (prog->aux->used_maps[i] == (void *)addr) - return prog->aux->used_maps[i]; + for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { + map = prog->aux->used_maps[i]; + if (map == (void *)addr) { + *type = BPF_PSEUDO_MAP_FD; + return map; + } + if (!map->ops->map_direct_value_meta) + continue; + if (!map->ops->map_direct_value_meta(map, addr, off)) { + *type = BPF_PSEUDO_MAP_VALUE; + return map; + } + } + return NULL; } @@ -2086,6 +2099,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) { const struct bpf_map *map; struct bpf_insn *insns; + u32 off, type; u64 imm; int i; @@ -2113,11 +2127,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) continue; imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; - map = bpf_map_from_imm(prog, imm); + map = bpf_map_from_imm(prog, imm, &off, &type); if (map) { - insns[i].src_reg = BPF_PSEUDO_MAP_FD; + insns[i].src_reg = type; insns[i].imm = map->id; - insns[i + 1].imm = 0; + insns[i + 1].imm = off; continue; } } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 48718e1da16d..6ab7a23fc924 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5056,18 +5056,12 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return 0; } -/* return the map pointer stored inside BPF_LD_IMM64 instruction */ -static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) -{ - u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32; - - return (struct bpf_map *) (unsigned long) imm64; -} - /* verify BPF_LD_IMM64 instruction */ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { + struct bpf_insn_aux_data *aux = cur_aux(env); struct bpf_reg_state *regs = cur_regs(env); + struct bpf_map *map; int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -5091,11 +5085,22 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } - /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ - BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); + map = env->used_maps[aux->map_index]; + mark_reg_known_zero(env, regs, insn->dst_reg); + regs[insn->dst_reg].map_ptr = map; + + if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { + regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; + regs[insn->dst_reg].off = aux->map_off; + if (map_value_has_spin_lock(map)) + regs[insn->dst_reg].id = ++env->id_gen; + } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + regs[insn->dst_reg].type = CONST_PTR_TO_MAP; + } else { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } - regs[insn->dst_reg].type = CONST_PTR_TO_MAP; - regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn); return 0; } @@ -6803,8 +6808,10 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) } if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { + struct bpf_insn_aux_data *aux; struct bpf_map *map; struct fd f; + u64 addr; if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 || @@ -6813,13 +6820,19 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) return -EINVAL; } - if (insn->src_reg == 0) + if (insn[0].src_reg == 0) /* valid generic load 64-bit imm */ goto next_insn; - if (insn[0].src_reg != BPF_PSEUDO_MAP_FD || - insn[1].imm != 0) { - verbose(env, "unrecognized bpf_ld_imm64 insn\n"); + /* In final convert_pseudo_ld_imm64() step, this is + * converted into regular 64-bit imm load insn. + */ + if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && + insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || + (insn[0].src_reg == BPF_PSEUDO_MAP_FD && + insn[1].imm != 0)) { + verbose(env, + "unrecognized bpf_ld_imm64 insn\n"); return -EINVAL; } @@ -6837,16 +6850,47 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) return err; } - /* store map pointer inside BPF_LD_IMM64 instruction */ - insn[0].imm = (u32) (unsigned long) map; - insn[1].imm = ((u64) (unsigned long) map) >> 32; + aux = &env->insn_aux_data[i]; + if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + addr = (unsigned long)map; + } else { + u32 off = insn[1].imm; + + if (off >= BPF_MAX_VAR_OFF) { + verbose(env, "direct value offset of %u is not allowed\n", off); + fdput(f); + return -EINVAL; + } + + if (!map->ops->map_direct_value_addr) { + verbose(env, "no direct value access support for this map type\n"); + fdput(f); + return -EINVAL; + } + + err = map->ops->map_direct_value_addr(map, &addr, off); + if (err) { + verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", + map->value_size, off); + fdput(f); + return err; + } + + aux->map_off = off; + addr += off; + } + + insn[0].imm = (u32)addr; + insn[1].imm = addr >> 32; /* check whether we recorded this map already */ - for (j = 0; j < env->used_map_cnt; j++) + for (j = 0; j < env->used_map_cnt; j++) { if (env->used_maps[j] == map) { + aux->map_index = j; fdput(f); goto next_insn; } + } if (env->used_map_cnt >= MAX_USED_MAPS) { fdput(f); @@ -6863,6 +6907,8 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) fdput(f); return PTR_ERR(map); } + + aux->map_index = env->used_map_cnt; env->used_maps[env->used_map_cnt++] = map; if (bpf_map_is_cgroup_storage(map) && diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 7073dbe1ff27..0bb17bf88b18 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -195,6 +195,9 @@ static const char *print_imm(void *private_data, if (insn->src_reg == BPF_PSEUDO_MAP_FD) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "map[id:%u]", insn->imm); + else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm); else snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "0x%llx", (unsigned long long)full_imm); -- cgit From be70bcd53de66e86f2726e576307cbdaebd3b1a5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:04 +0200 Subject: bpf: do not retain flags that are not tied to map lifetime Both BPF_F_WRONLY / BPF_F_RDONLY flags are tied to the map file descriptor, but not to the map object itself! Meaning, at map creation time BPF_F_RDONLY can be set to make the map read-only from syscall side, but this holds only for the returned fd, so any other fd either retrieved via bpf file system or via map id for the very same underlying map object can have read-write access instead. Given that, keeping the two flags around in the map_flags attribute and exposing them to user space upon map dump is misleading and may lead to false conclusions. Since these two flags are not tied to the map object lets also not store them as map property. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 828518bb947b..56b4b0e08b3b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -166,13 +166,25 @@ void bpf_map_area_free(void *area) kvfree(area); } +static u32 bpf_map_flags_retain_permanent(u32 flags) +{ + /* Some map creation flags are not tied to the map object but + * rather to the map fd instead, so they have no meaning upon + * map object inspection since multiple file descriptors with + * different (access) properties can exist here. Thus, given + * this has zero meaning for the map itself, lets clear these + * from here. + */ + return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); +} + void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) { map->map_type = attr->map_type; map->key_size = attr->key_size; map->value_size = attr->value_size; map->max_entries = attr->max_entries; - map->map_flags = attr->map_flags; + map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); } -- cgit From 591fe9888d7809d9ee5c828020b6c6ae27c37229 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:05 +0200 Subject: bpf: add program side {rd, wr}only support for maps This work adds two new map creation flags BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG in order to allow for read-only or write-only BPF maps from a BPF program side. Today we have BPF_F_RDONLY and BPF_F_WRONLY, but this only applies to system call side, meaning the BPF program has full read/write access to the map as usual while bpf(2) calls with map fd can either only read or write into the map depending on the flags. BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG allows for the exact opposite such that verifier is going to reject program loads if write into a read-only map or a read into a write-only map is detected. For read-only map case also some helpers are forbidden for programs that would alter the map state such as map deletion, update, etc. As opposed to the two BPF_F_RDONLY / BPF_F_WRONLY flags, BPF_F_RDONLY_PROG as well as BPF_F_WRONLY_PROG really do correspond to the map lifetime. We've enabled this generic map extension to various non-special maps holding normal user data: array, hash, lru, lpm, local storage, queue and stack. Further generic map types could be followed up in future depending on use-case. Main use case here is to forbid writes into .rodata map values from verifier side. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 29 +++++++++++++++++++++++++++ include/uapi/linux/bpf.h | 6 +++++- kernel/bpf/arraymap.c | 6 +++++- kernel/bpf/hashtab.c | 6 +++--- kernel/bpf/local_storage.c | 6 +++--- kernel/bpf/lpm_trie.c | 3 ++- kernel/bpf/queue_stack_maps.c | 6 +++--- kernel/bpf/syscall.c | 2 ++ kernel/bpf/verifier.c | 46 +++++++++++++++++++++++++++++++++++++++++-- 9 files changed, 96 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bd93a592dd29..be20804631b5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -430,6 +430,35 @@ struct bpf_array { #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 32 +#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ + BPF_F_RDONLY_PROG | \ + BPF_F_WRONLY | \ + BPF_F_WRONLY_PROG) + +#define BPF_MAP_CAN_READ BIT(0) +#define BPF_MAP_CAN_WRITE BIT(1) + +static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) +{ + u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); + + /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is + * not possible. + */ + if (access_flags & BPF_F_RDONLY_PROG) + return BPF_MAP_CAN_READ; + else if (access_flags & BPF_F_WRONLY_PROG) + return BPF_MAP_CAN_WRITE; + else + return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE; +} + +static inline bool bpf_map_flags_access_ok(u32 access_flags) +{ + return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) != + (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 26cfb5b2c964..d275446d807c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -294,7 +294,7 @@ enum bpf_attach_type { #define BPF_OBJ_NAME_LEN 16U -/* Flags for accessing BPF object */ +/* Flags for accessing BPF object from syscall side. */ #define BPF_F_RDONLY (1U << 3) #define BPF_F_WRONLY (1U << 4) @@ -304,6 +304,10 @@ enum bpf_attach_type { /* Zero-initialize hash function seed. This should only be used for testing. */ #define BPF_F_ZERO_SEED (1U << 6) +/* Flags for accessing BPF object from program side. */ +#define BPF_F_RDONLY_PROG (1U << 7) +#define BPF_F_WRONLY_PROG (1U << 8) + /* flags for BPF_PROG_QUERY */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1a6e9861d554..217b10bd9f48 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -22,7 +22,7 @@ #include "map_in_map.h" #define ARRAY_CREATE_FLAG_MASK \ - (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) static void bpf_array_free_percpu(struct bpf_array *array) { @@ -63,6 +63,7 @@ int array_map_alloc_check(union bpf_attr *attr) if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size == 0 || attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags) || (percpu && numa_node != NUMA_NO_NODE)) return -EINVAL; @@ -472,6 +473,9 @@ static int fd_array_map_alloc_check(union bpf_attr *attr) /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return -EINVAL; + /* Program read-only/write-only not supported for special maps yet. */ + if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) + return -EINVAL; return array_map_alloc_check(attr); } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index fed15cf94dca..192d32e77db3 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -23,7 +23,7 @@ #define HTAB_CREATE_FLAG_MASK \ (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ - BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED) + BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) struct bucket { struct hlist_nulls_head head; @@ -262,8 +262,8 @@ static int htab_map_alloc_check(union bpf_attr *attr) /* Guard against local DoS, and discourage production use. */ return -EPERM; - if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK) - /* reserved bits should not be used */ + if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags)) return -EINVAL; if (!lru && percpu_lru) diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 6b572e2de7fb..980e8f1f6cb5 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STO #ifdef CONFIG_CGROUP_BPF #define LOCAL_STORAGE_CREATE_FLAG_MASK \ - (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) struct bpf_cgroup_storage_map { struct bpf_map map; @@ -282,8 +282,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) if (attr->value_size > PAGE_SIZE) return ERR_PTR(-E2BIG); - if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK) - /* reserved bits should not be used */ + if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags)) return ERR_PTR(-EINVAL); if (attr->max_entries) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 93a5cbbde421..e61630c2e50b 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -538,7 +538,7 @@ out: #define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN) #define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \ - BPF_F_RDONLY | BPF_F_WRONLY) + BPF_F_ACCESS_MASK) static struct bpf_map *trie_alloc(union bpf_attr *attr) { @@ -553,6 +553,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) if (attr->max_entries == 0 || !(attr->map_flags & BPF_F_NO_PREALLOC) || attr->map_flags & ~LPM_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags) || attr->key_size < LPM_KEY_SIZE_MIN || attr->key_size > LPM_KEY_SIZE_MAX || attr->value_size < LPM_VAL_SIZE_MIN || diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index b384ea9f3254..0b140d236889 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -11,8 +11,7 @@ #include "percpu_freelist.h" #define QUEUE_STACK_CREATE_FLAG_MASK \ - (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) - + (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) struct bpf_queue_stack { struct bpf_map map; @@ -52,7 +51,8 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr) /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 0 || attr->value_size == 0 || - attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK) + attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags)) return -EINVAL; if (attr->value_size > KMALLOC_MAX_SIZE) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 56b4b0e08b3b..0c9276b54c88 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -501,6 +501,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->spin_lock_off = btf_find_spin_lock(btf, value_type); if (map_value_has_spin_lock(map)) { + if (map->map_flags & BPF_F_RDONLY_PROG) + return -EACCES; if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6ab7a23fc924..b747434df89c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1439,6 +1439,28 @@ static int check_stack_access(struct bpf_verifier_env *env, return 0; } +static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, + int off, int size, enum bpf_access_type type) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_map *map = regs[regno].map_ptr; + u32 cap = bpf_map_flags_to_cap(map); + + if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { + verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", + map->value_size, off, size); + return -EACCES; + } + + if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { + verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", + map->value_size, off, size); + return -EACCES; + } + + return 0; +} + /* check read/write into map element returned by bpf_map_lookup_elem() */ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed) @@ -2024,7 +2046,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose(env, "R%d leaks addr into map\n", value_regno); return -EACCES; } - + err = check_map_access_type(env, regno, off, size, t); + if (err) + return err; err = check_map_access(env, regno, off, size, false); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); @@ -2327,6 +2351,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MAP_VALUE: + if (check_map_access_type(env, regno, reg->off, access_size, + meta && meta->raw_mode ? BPF_WRITE : + BPF_READ)) + return -EACCES; return check_map_access(env, regno, reg->off, access_size, zero_size_allowed); default: /* scalar_value|ptr_to_stack or invalid ptr */ @@ -3059,6 +3087,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx) { struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; + struct bpf_map *map = meta->map_ptr; if (func_id != BPF_FUNC_tail_call && func_id != BPF_FUNC_map_lookup_elem && @@ -3069,11 +3098,24 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_peek_elem) return 0; - if (meta->map_ptr == NULL) { + if (map == NULL) { verbose(env, "kernel subsystem misconfigured verifier\n"); return -EINVAL; } + /* In case of read-only, some additional restrictions + * need to be applied in order to prevent altering the + * state of the map from program side. + */ + if ((map->map_flags & BPF_F_RDONLY_PROG) && + (func_id == BPF_FUNC_map_delete_elem || + func_id == BPF_FUNC_map_update_elem || + func_id == BPF_FUNC_map_push_elem || + func_id == BPF_FUNC_map_pop_elem)) { + verbose(env, "write into map forbidden\n"); + return -EACCES; + } + if (!BPF_MAP_PTR(aux->map_state)) bpf_map_ptr_store(aux, meta->map_ptr, meta->map_ptr->unpriv_array); -- cgit From 87df15de441bd4add7876ef584da8cabdd9a042a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:06 +0200 Subject: bpf: add syscall side map freeze support This patch adds a new BPF_MAP_FREEZE command which allows to "freeze" the map globally as read-only / immutable from syscall side. Map permission handling has been refactored into map_get_sys_perms() and drops FMODE_CAN_WRITE in case of locked map. Main use case is to allow for setting up .rodata sections from the BPF ELF which are loaded into the kernel, meaning BPF loader first allocates map, sets up map value by copying .rodata section into it and once complete, it calls BPF_MAP_FREEZE on the map fd to prevent further modifications. Right now BPF_MAP_FREEZE only takes map fd as argument while remaining bpf_attr members are required to be zero. I didn't add write-only locking here as counterpart since I don't have a concrete use-case for it on my side, and I think it makes probably more sense to wait once there is actually one. In that case bpf_attr can be extended as usual with a flag field and/or others where flag 0 means that we lock the map read-only hence this doesn't prevent to add further extensions to BPF_MAP_FREEZE upon need. A map creation flag like BPF_F_WRONCE was not considered for couple of reasons: i) in case of a generic implementation, a map can consist of more than just one element, thus there could be multiple map updates needed to set the map into a state where it can then be made immutable, ii) WRONCE indicates exact one-time write before it is then set immutable. A generic implementation would set a bit atomically on map update entry (if unset), indicating that every subsequent update from then onwards will need to bail out there. However, map updates can fail, so upon failure that flag would need to be unset again and the update attempt would need to be repeated for it to be eventually made immutable. While this can be made race-free, this approach feels less clean and in combination with reason i), it's not generic enough. A dedicated BPF_MAP_FREEZE command directly sets the flag and caller has the guarantee that map is immutable from syscall side upon successful return for any future syscall invocations that would alter the map state, which is also more intuitive from an API point of view. A command name such as BPF_MAP_LOCK has been avoided as it's too close with BPF map spin locks (which already has BPF_F_LOCK flag). BPF_MAP_FREEZE is so far only enabled for privileged users. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 ++- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 66 +++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be20804631b5..65f7094c40b4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -87,7 +87,8 @@ struct bpf_map { struct btf *btf; u32 pages; bool unpriv_array; - /* 51 bytes hole */ + bool frozen; /* write-once */ + /* 48 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d275446d807c..af1cbd951f26 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -105,6 +105,7 @@ enum bpf_cmd { BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, BPF_MAP_LOOKUP_AND_DELETE_ELEM, + BPF_MAP_FREEZE, }; enum bpf_map_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0c9276b54c88..b3ce516e5a20 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -355,6 +355,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp) return 0; } +static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) +{ + fmode_t mode = f.file->f_mode; + + /* Our file permissions may have been overridden by global + * map permissions facing syscall side. + */ + if (READ_ONCE(map->frozen)) + mode &= ~FMODE_CAN_WRITE; + return mode; +} + #ifdef CONFIG_PROC_FS static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { @@ -376,14 +388,16 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "max_entries:\t%u\n" "map_flags:\t%#x\n" "memlock:\t%llu\n" - "map_id:\t%u\n", + "map_id:\t%u\n" + "frozen:\t%u\n", map->map_type, map->key_size, map->value_size, map->max_entries, map->map_flags, map->pages * 1ULL << PAGE_SHIFT, - map->id); + map->id, + READ_ONCE(map->frozen)); if (owner_prog_type) { seq_printf(m, "owner_prog_type:\t%u\n", @@ -727,8 +741,7 @@ static int map_lookup_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_READ)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { err = -EPERM; goto err_put; } @@ -857,8 +870,7 @@ static int map_update_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -969,8 +981,7 @@ static int map_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -1021,8 +1032,7 @@ static int map_get_next_key(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_READ)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { err = -EPERM; goto err_put; } @@ -1089,8 +1099,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if (!(f.file->f_mode & FMODE_CAN_WRITE)) { + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -1132,6 +1141,36 @@ err_put: return err; } +#define BPF_MAP_FREEZE_LAST_FIELD map_fd + +static int map_freeze(const union bpf_attr *attr) +{ + int err = 0, ufd = attr->map_fd; + struct bpf_map *map; + struct fd f; + + if (CHECK_ATTR(BPF_MAP_FREEZE)) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + if (READ_ONCE(map->frozen)) { + err = -EBUSY; + goto err_put; + } + if (!capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto err_put; + } + + WRITE_ONCE(map->frozen, true); +err_put: + fdput(f); + return err; +} + static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name) \ [_id] = & _name ## _prog_ops, @@ -2735,6 +2774,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_MAP_GET_NEXT_KEY: err = map_get_next_key(&attr); break; + case BPF_MAP_FREEZE: + err = map_freeze(&attr); + break; case BPF_PROG_LOAD: err = bpf_prog_load(&attr, uattr); break; -- cgit From 3e0ddc4f3ff1436970e96e76f3df3c3b5f5173b6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:07 +0200 Subject: bpf: allow . char as part of the object name Trivial addition to allow '.' aside from '_' as "special" characters in the object name. Used to allow for substrings in maps from loader side such as ".bss", ".data", ".rodata", but could also be useful for other purposes. Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b3ce516e5a20..198c9680bf0d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -474,10 +474,10 @@ static int bpf_obj_name_cpy(char *dst, const char *src) const char *end = src + BPF_OBJ_NAME_LEN; memset(dst, 0, BPF_OBJ_NAME_LEN); - - /* Copy all isalnum() and '_' char */ + /* Copy all isalnum(), '_' and '.' chars. */ while (src < end && *src) { - if (!isalnum(*src) && *src != '_') + if (!isalnum(*src) && + *src != '_' && *src != '.') return -EINVAL; *dst++ = *src++; } -- cgit From 1dc92851849cc2235a1efef8f8d5a9255efc5f13 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:09 +0200 Subject: bpf: kernel side support for BTF Var and DataSec This work adds kernel-side verification, logging and seq_show dumping of BTF Var and DataSec kinds which are emitted with latest LLVM. The following constraints apply: BTF Var must have: - Its kind_flag is 0 - Its vlen is 0 - Must point to a valid type - Type must not resolve to a forward type - Size of underlying type must be > 0 - Must have a valid name - Can only be a source type, not sink or intermediate one - Name may include dots (e.g. in case of static variables inside functions) - Cannot be a member of a struct/union - Linkage so far can either only be static or global/allocated BTF DataSec must have: - Its kind_flag is 0 - Its vlen cannot be 0 - Its size cannot be 0 - Must have a valid name - Can only be a source type, not sink or intermediate one - Name may include dots (e.g. to represent .bss, .data, .rodata etc) - Cannot be a member of a struct/union - Inner btf_var_secinfo array with {type,offset,size} triple must be sorted by offset in ascending order - Type must always point to BTF Var - BTF resolved size of Var must be <= size provided by triple - DataSec size must be >= sum of triple sizes (thus holes are allowed) btf_var_resolve(), btf_ptr_resolve() and btf_modifier_resolve() are on a high level quite similar but each come with slight, subtle differences. They could potentially be a bit refactored in future which hasn't been done here to ease review. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 397 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index bd3921b1514b..0cecf6bab61b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -185,6 +185,16 @@ i < btf_type_vlen(struct_type); \ i++, member++) +#define for_each_vsi(i, struct_type, member) \ + for (i = 0, member = btf_type_var_secinfo(struct_type); \ + i < btf_type_vlen(struct_type); \ + i++, member++) + +#define for_each_vsi_from(i, from, struct_type, member) \ + for (i = from, member = btf_type_var_secinfo(struct_type) + from; \ + i < btf_type_vlen(struct_type); \ + i++, member++) + static DEFINE_IDR(btf_idr); static DEFINE_SPINLOCK(btf_idr_lock); @@ -262,6 +272,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_RESTRICT] = "RESTRICT", [BTF_KIND_FUNC] = "FUNC", [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", + [BTF_KIND_VAR] = "VAR", + [BTF_KIND_DATASEC] = "DATASEC", }; struct btf_kind_operations { @@ -375,13 +387,36 @@ static bool btf_type_is_int(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_INT; } +static bool btf_type_is_var(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; +} + +static bool btf_type_is_datasec(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; +} + +/* Types that act only as a source, not sink or intermediate + * type when resolving. + */ +static bool btf_type_is_resolve_source_only(const struct btf_type *t) +{ + return btf_type_is_var(t) || + btf_type_is_datasec(t); +} + /* What types need to be resolved? * * btf_type_is_modifier() is an obvious one. * * btf_type_is_struct() because its member refers to * another type (through member->type). - + * + * btf_type_is_var() because the variable refers to + * another type. btf_type_is_datasec() holds multiple + * btf_type_is_var() types that need resolving. + * * btf_type_is_array() because its element (array->type) * refers to another type. Array can be thought of a * special case of struct while array just has the same @@ -390,9 +425,11 @@ static bool btf_type_is_int(const struct btf_type *t) static bool btf_type_needs_resolve(const struct btf_type *t) { return btf_type_is_modifier(t) || - btf_type_is_ptr(t) || - btf_type_is_struct(t) || - btf_type_is_array(t); + btf_type_is_ptr(t) || + btf_type_is_struct(t) || + btf_type_is_array(t) || + btf_type_is_var(t) || + btf_type_is_datasec(t); } /* t->size can be used */ @@ -403,6 +440,7 @@ static bool btf_type_has_size(const struct btf_type *t) case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_DATASEC: return true; } @@ -467,6 +505,16 @@ static const struct btf_enum *btf_type_enum(const struct btf_type *t) return (const struct btf_enum *)(t + 1); } +static const struct btf_var *btf_type_var(const struct btf_type *t) +{ + return (const struct btf_var *)(t + 1); +} + +static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t) +{ + return (const struct btf_var_secinfo *)(t + 1); +} + static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) { return kind_ops[BTF_INFO_KIND(t->info)]; @@ -478,23 +526,31 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset) offset < btf->hdr.str_len; } -/* Only C-style identifier is permitted. This can be relaxed if - * necessary. - */ -static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) +static bool __btf_name_char_ok(char c, bool first, bool dot_ok) +{ + if ((first ? !isalpha(c) : + !isalnum(c)) && + c != '_' && + ((c == '.' && !dot_ok) || + c != '.')) + return false; + return true; +} + +static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) { /* offset must be valid */ const char *src = &btf->strings[offset]; const char *src_limit; - if (!isalpha(*src) && *src != '_') + if (!__btf_name_char_ok(*src, true, dot_ok)) return false; /* set a limit on identifier length */ src_limit = src + KSYM_NAME_LEN; src++; while (*src && src < src_limit) { - if (!isalnum(*src) && *src != '_') + if (!__btf_name_char_ok(*src, false, dot_ok)) return false; src++; } @@ -502,6 +558,19 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) return !*src; } +/* Only C-style identifier is permitted. This can be relaxed if + * necessary. + */ +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) +{ + return __btf_name_valid(btf, offset, false); +} + +static bool btf_name_valid_section(const struct btf *btf, u32 offset) +{ + return __btf_name_valid(btf, offset, true); +} + static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) { if (!offset) @@ -697,6 +766,32 @@ static void btf_verifier_log_member(struct btf_verifier_env *env, __btf_verifier_log(log, "\n"); } +__printf(4, 5) +static void btf_verifier_log_vsi(struct btf_verifier_env *env, + const struct btf_type *datasec_type, + const struct btf_var_secinfo *vsi, + const char *fmt, ...) +{ + struct bpf_verifier_log *log = &env->log; + va_list args; + + if (!bpf_verifier_log_needed(log)) + return; + if (env->phase != CHECK_META) + btf_verifier_log_type(env, datasec_type, NULL); + + __btf_verifier_log(log, "\t type_id=%u offset=%u size=%u", + vsi->type, vsi->offset, vsi->size); + if (fmt && *fmt) { + __btf_verifier_log(log, " "); + va_start(args, fmt); + bpf_verifier_vlog(log, fmt, args); + va_end(args); + } + + __btf_verifier_log(log, "\n"); +} + static void btf_verifier_log_hdr(struct btf_verifier_env *env, u32 btf_data_size) { @@ -974,7 +1069,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, } else if (btf_type_is_ptr(size_type)) { size = sizeof(void *); } else { - if (WARN_ON_ONCE(!btf_type_is_modifier(size_type))) + if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) && + !btf_type_is_var(size_type))) return NULL; size = btf->resolved_sizes[size_type_id]; @@ -1509,7 +1605,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, u32 next_type_size = 0; next_type = btf_type_by_id(btf, next_type_id); - if (!next_type) { + if (!next_type || btf_type_is_resolve_source_only(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } @@ -1542,6 +1638,53 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, return 0; } +static int btf_var_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *next_type; + const struct btf_type *t = v->t; + u32 next_type_id = t->type; + struct btf *btf = env->btf; + u32 next_type_size; + + next_type = btf_type_by_id(btf, next_type_id); + if (!next_type || btf_type_is_resolve_source_only(next_type)) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } + + if (!env_type_is_resolve_sink(env, next_type) && + !env_type_is_resolved(env, next_type_id)) + return env_stack_push(env, next_type, next_type_id); + + if (btf_type_is_modifier(next_type)) { + const struct btf_type *resolved_type; + u32 resolved_type_id; + + resolved_type_id = next_type_id; + resolved_type = btf_type_id_resolve(btf, &resolved_type_id); + + if (btf_type_is_ptr(resolved_type) && + !env_type_is_resolve_sink(env, resolved_type) && + !env_type_is_resolved(env, resolved_type_id)) + return env_stack_push(env, resolved_type, + resolved_type_id); + } + + /* We must resolve to something concrete at this point, no + * forward types or similar that would resolve to size of + * zero is allowed. + */ + if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } + + env_stack_pop_resolved(env, next_type_id, next_type_size); + + return 0; +} + static int btf_ptr_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { @@ -1551,7 +1694,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env, struct btf *btf = env->btf; next_type = btf_type_by_id(btf, next_type_id); - if (!next_type) { + if (!next_type || btf_type_is_resolve_source_only(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } @@ -1609,6 +1752,15 @@ static void btf_modifier_seq_show(const struct btf *btf, btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); } +static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + t = btf_type_id_resolve(btf, &type_id); + + btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); +} + static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct seq_file *m) @@ -1776,7 +1928,8 @@ static int btf_array_resolve(struct btf_verifier_env *env, /* Check array->index_type */ index_type_id = array->index_type; index_type = btf_type_by_id(btf, index_type_id); - if (btf_type_nosize_or_null(index_type)) { + if (btf_type_is_resolve_source_only(index_type) || + btf_type_nosize_or_null(index_type)) { btf_verifier_log_type(env, v->t, "Invalid index"); return -EINVAL; } @@ -1795,7 +1948,8 @@ static int btf_array_resolve(struct btf_verifier_env *env, /* Check array->type */ elem_type_id = array->type; elem_type = btf_type_by_id(btf, elem_type_id); - if (btf_type_nosize_or_null(elem_type)) { + if (btf_type_is_resolve_source_only(elem_type) || + btf_type_nosize_or_null(elem_type)) { btf_verifier_log_type(env, v->t, "Invalid elem"); return -EINVAL; @@ -2016,7 +2170,8 @@ static int btf_struct_resolve(struct btf_verifier_env *env, const struct btf_type *member_type = btf_type_by_id(env->btf, member_type_id); - if (btf_type_nosize_or_null(member_type)) { + if (btf_type_is_resolve_source_only(member_type) || + btf_type_nosize_or_null(member_type)) { btf_verifier_log_member(env, v->t, member, "Invalid member"); return -EINVAL; @@ -2411,6 +2566,222 @@ static struct btf_kind_operations func_ops = { .seq_show = btf_df_seq_show, }; +static s32 btf_var_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_var *var; + u32 meta_needed = sizeof(*var); + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + + if (!t->name_off || + !__btf_name_valid(env->btf, t->name_off, true)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + /* A var cannot be in type void */ + if (!t->type || !BTF_TYPE_ID_VALID(t->type)) { + btf_verifier_log_type(env, t, "Invalid type_id"); + return -EINVAL; + } + + var = btf_type_var(t); + if (var->linkage != BTF_VAR_STATIC && + var->linkage != BTF_VAR_GLOBAL_ALLOCATED) { + btf_verifier_log_type(env, t, "Linkage not supported"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return meta_needed; +} + +static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t) +{ + const struct btf_var *var = btf_type_var(t); + + btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage); +} + +static const struct btf_kind_operations var_ops = { + .check_meta = btf_var_check_meta, + .resolve = btf_var_resolve, + .check_member = btf_df_check_member, + .check_kflag_member = btf_df_check_kflag_member, + .log_details = btf_var_log, + .seq_show = btf_var_seq_show, +}; + +static s32 btf_datasec_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_var_secinfo *vsi; + u64 last_vsi_end_off = 0, sum = 0; + u32 i, meta_needed; + + meta_needed = btf_type_vlen(t) * sizeof(*vsi); + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (!btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen == 0"); + return -EINVAL; + } + + if (!t->size) { + btf_verifier_log_type(env, t, "size == 0"); + return -EINVAL; + } + + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + + if (!t->name_off || + !btf_name_valid_section(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + for_each_vsi(i, t, vsi) { + /* A var cannot be in type void */ + if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) { + btf_verifier_log_vsi(env, t, vsi, + "Invalid type_id"); + return -EINVAL; + } + + if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) { + btf_verifier_log_vsi(env, t, vsi, + "Invalid offset"); + return -EINVAL; + } + + if (!vsi->size || vsi->size > t->size) { + btf_verifier_log_vsi(env, t, vsi, + "Invalid size"); + return -EINVAL; + } + + last_vsi_end_off = vsi->offset + vsi->size; + if (last_vsi_end_off > t->size) { + btf_verifier_log_vsi(env, t, vsi, + "Invalid offset+size"); + return -EINVAL; + } + + btf_verifier_log_vsi(env, t, vsi, NULL); + sum += vsi->size; + } + + if (t->size < sum) { + btf_verifier_log_type(env, t, "Invalid btf_info size"); + return -EINVAL; + } + + return meta_needed; +} + +static int btf_datasec_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_var_secinfo *vsi; + struct btf *btf = env->btf; + u16 i; + + for_each_vsi_from(i, v->next_member, v->t, vsi) { + u32 var_type_id = vsi->type, type_id, type_size = 0; + const struct btf_type *var_type = btf_type_by_id(env->btf, + var_type_id); + if (!var_type || !btf_type_is_var(var_type)) { + btf_verifier_log_vsi(env, v->t, vsi, + "Not a VAR kind member"); + return -EINVAL; + } + + if (!env_type_is_resolve_sink(env, var_type) && + !env_type_is_resolved(env, var_type_id)) { + env_stack_set_next_member(env, i + 1); + return env_stack_push(env, var_type, var_type_id); + } + + type_id = var_type->type; + if (!btf_type_id_size(btf, &type_id, &type_size)) { + btf_verifier_log_vsi(env, v->t, vsi, "Invalid type"); + return -EINVAL; + } + + if (vsi->size < type_size) { + btf_verifier_log_vsi(env, v->t, vsi, "Invalid size"); + return -EINVAL; + } + } + + env_stack_pop_resolved(env, 0, 0); + return 0; +} + +static void btf_datasec_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); +} + +static void btf_datasec_seq_show(const struct btf *btf, + const struct btf_type *t, u32 type_id, + void *data, u8 bits_offset, + struct seq_file *m) +{ + const struct btf_var_secinfo *vsi; + const struct btf_type *var; + u32 i; + + seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off)); + for_each_vsi(i, t, vsi) { + var = btf_type_by_id(btf, vsi->type); + if (i) + seq_puts(m, ","); + btf_type_ops(var)->seq_show(btf, var, vsi->type, + data + vsi->offset, bits_offset, m); + } + seq_puts(m, "}"); +} + +static const struct btf_kind_operations datasec_ops = { + .check_meta = btf_datasec_check_meta, + .resolve = btf_datasec_resolve, + .check_member = btf_df_check_member, + .check_kflag_member = btf_df_check_kflag_member, + .log_details = btf_datasec_log, + .seq_show = btf_datasec_seq_show, +}; + static int btf_func_proto_check(struct btf_verifier_env *env, const struct btf_type *t) { @@ -2542,6 +2913,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_RESTRICT] = &modifier_ops, [BTF_KIND_FUNC] = &func_ops, [BTF_KIND_FUNC_PROTO] = &func_proto_ops, + [BTF_KIND_VAR] = &var_ops, + [BTF_KIND_DATASEC] = &datasec_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -2622,13 +2995,17 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, if (!env_type_is_resolved(env, type_id)) return false; - if (btf_type_is_struct(t)) + if (btf_type_is_struct(t) || btf_type_is_datasec(t)) return !btf->resolved_ids[type_id] && - !btf->resolved_sizes[type_id]; + !btf->resolved_sizes[type_id]; - if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) { + if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || + btf_type_is_var(t)) { t = btf_type_id_resolve(btf, &type_id); - return t && !btf_type_is_modifier(t); + return t && + !btf_type_is_modifier(t) && + !btf_type_is_var(t) && + !btf_type_is_datasec(t); } if (btf_type_is_array(t)) { -- cgit From 2824ecb7010f6a20e9a4140512b798469ab066cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:10 +0200 Subject: bpf: allow for key-less BTF in array map Given we'll be reusing BPF array maps for global data/bss/rodata sections, we need a way to associate BTF DataSec type as its map value type. In usual cases we have this ugly BPF_ANNOTATE_KV_PAIR() macro hack e.g. via 38d5d3b3d5db ("bpf: Introduce BPF_ANNOTATE_KV_PAIR") to get initial map to type association going. While more use cases for it are discouraged, this also won't work for global data since the use of array map is a BPF loader detail and therefore unknown at compilation time. For array maps with just a single entry we make an exception in terms of BTF in that key type is declared optional if value type is of DataSec type. The latter LLVM is guaranteed to emit and it also aligns with how we regard global data maps as just a plain buffer area reusing existing map facilities for allowing things like introspection with existing tools. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + kernel/bpf/arraymap.c | 15 ++++++++++++++- kernel/bpf/btf.c | 2 +- kernel/bpf/syscall.c | 15 +++++++++++---- 4 files changed, 27 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/include/linux/btf.h b/include/linux/btf.h index 455d31b55828..64cdf2a23d42 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -51,6 +51,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); +bool btf_type_is_void(const struct btf_type *t); #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 217b10bd9f48..584636c9e2eb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -391,7 +391,8 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key, return; } - seq_printf(m, "%u: ", *(u32 *)key); + if (map->btf_key_type_id) + seq_printf(m, "%u: ", *(u32 *)key); btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); seq_puts(m, "\n"); @@ -428,6 +429,18 @@ static int array_map_check_btf(const struct bpf_map *map, { u32 int_data; + /* One exception for keyless BTF: .bss/.data/.rodata map */ + if (btf_type_is_void(key_type)) { + if (map->map_type != BPF_MAP_TYPE_ARRAY || + map->max_entries != 1) + return -EINVAL; + + if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) + return -EINVAL; + + return 0; + } + if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) return -EINVAL; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0cecf6bab61b..cad09858a5f2 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -326,7 +326,7 @@ static bool btf_type_is_modifier(const struct btf_type *t) return false; } -static bool btf_type_is_void(const struct btf_type *t) +bool btf_type_is_void(const struct btf_type *t) { return t == &btf_void; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 198c9680bf0d..438199e2eca4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -504,9 +504,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 key_size, value_size; int ret = 0; - key_type = btf_type_id_size(btf, &btf_key_id, &key_size); - if (!key_type || key_size != map->key_size) - return -EINVAL; + /* Some maps allow key to be unspecified. */ + if (btf_key_id) { + key_type = btf_type_id_size(btf, &btf_key_id, &key_size); + if (!key_type || key_size != map->key_size) + return -EINVAL; + } else { + key_type = btf_type_by_id(btf, 0); + if (!map->ops->map_check_btf) + return -EINVAL; + } value_type = btf_type_id_size(btf, &btf_value_id, &value_size); if (!value_type || value_size != map->value_size) @@ -573,7 +580,7 @@ static int map_create(union bpf_attr *attr) if (attr->btf_key_type_id || attr->btf_value_type_id) { struct btf *btf; - if (!attr->btf_key_type_id || !attr->btf_value_type_id) { + if (!attr->btf_value_type_id) { err = -EINVAL; goto free_map_nouncharge; } -- cgit From ee6a6500fe1f5c5a3f18de33fe0178a3c627f6d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 10 Apr 2019 10:45:38 -0400 Subject: ftrace: Remove ASSIGN_OPS_HASH() macro from ftrace.c The ASSIGN_OPS_HASH() macro was moved to fgraph.c where it was used, but for some reason it wasn't removed from ftrace.c, as it is no longer referenced there. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 26c8ca9bd06b..bf11e0553450 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -69,12 +69,8 @@ #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), #else #define INIT_OPS_HASH(opsname) -#define ASSIGN_OPS_HASH(opsname, val) #endif enum { -- cgit From 83ca259489409a1fe8a83dad83a82f32174d4f31 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 5 Apr 2019 09:15:25 +0800 Subject: swiotlb: dump used and total slots when swiotlb buffer is full So far the kernel only prints the requested size if swiotlb buffer if full. It is not possible to know whether it is simply an out of buffer, or it is because swiotlb cannot allocate buffer with the requested size due to fragmentation. As 'io_tlb_used' is available since commit 71602fe6d4e9 ("swiotlb: add debugfs to track swiotlb buffer usage"), both 'io_tlb_used' and 'io_tlb_nslabs' are printed when swiotlb buffer is full. Signed-off-by: Dongli Zhang Signed-off-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 2b0c8fd9658e..82c767374c70 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -533,7 +533,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu, used %lu\n", + size, io_tlb_nslabs, io_tlb_used); return DMA_MAPPING_ERROR; found: io_tlb_used += nslots; -- cgit From b0b9395d865e3060d97658fbc9ba3f77fecc8da1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 9 Apr 2019 11:49:09 -0700 Subject: bpf: support input __sk_buff context in BPF_PROG_TEST_RUN Add new set of arguments to bpf_attr for BPF_PROG_TEST_RUN: * ctx_in/ctx_size_in - input context * ctx_out/ctx_size_out - output context The intended use case is to pass some meta data to the test runs that operate on skb (this has being brought up on recent LPC). For programs that use bpf_prog_test_run_skb, support __sk_buff input and output. Initially, from input __sk_buff, copy _only_ cb and priority into skb, all other non-zero fields are prohibited (with EINVAL). If the user has set ctx_out/ctx_size_out, copy the potentially modified __sk_buff back to the userspace. We require all fields of input __sk_buff except the ones we explicitly support to be set to zero. The expectation is that in the future we might add support for more fields and we want to fail explicitly if the user runs the program on the kernel where we don't yet support them. The API is intentionally vague (i.e. we don't explicitly add __sk_buff to bpf_attr, but ctx_in) to potentially let other test_run types use this interface in the future (this can be xdp_md for xdp types for example). v4: * don't copy more than allowed in bpf_ctx_init [Martin] v3: * handle case where ctx_in is NULL, but ctx_out is not [Martin] * convert size==0 checks to ptr==NULL checks and add some extra ptr checks [Martin] v2: * Addressed comments from Martin Lau Signed-off-by: Stanislav Fomichev Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 7 +++ kernel/bpf/syscall.c | 10 +++- net/bpf/test_run.c | 143 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 151 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index af1cbd951f26..31a27dd337dc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -412,6 +412,13 @@ union bpf_attr { __aligned_u64 data_out; __u32 repeat; __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 438199e2eca4..d995eedfdd16 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2009,7 +2009,7 @@ static int bpf_prog_query(const union bpf_attr *attr, return cgroup_bpf_prog_query(attr, uattr); } -#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration +#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out static int bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -2022,6 +2022,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr, if (CHECK_ATTR(BPF_PROG_TEST_RUN)) return -EINVAL; + if ((attr->test.ctx_size_in && !attr->test.ctx_in) || + (!attr->test.ctx_size_in && attr->test.ctx_in)) + return -EINVAL; + + if ((attr->test.ctx_size_out && !attr->test.ctx_out) || + (!attr->test.ctx_size_out && attr->test.ctx_out)) + return -EINVAL; + prog = bpf_prog_get(attr->test.prog_fd); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fab142b796ef..cbd4fb65aa4f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -123,12 +123,126 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, return data; } +static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) +{ + void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); + u32 size = kattr->test.ctx_size_in; + void *data; + int err; + + if (!data_in && !data_out) + return NULL; + + data = kzalloc(max_size, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); + + if (data_in) { + err = bpf_check_uarg_tail_zero(data_in, max_size, size); + if (err) { + kfree(data); + return ERR_PTR(err); + } + + size = min_t(u32, max_size, size); + if (copy_from_user(data, data_in, size)) { + kfree(data); + return ERR_PTR(-EFAULT); + } + } + return data; +} + +static int bpf_ctx_finish(const union bpf_attr *kattr, + union bpf_attr __user *uattr, const void *data, + u32 size) +{ + void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); + int err = -EFAULT; + u32 copy_size = size; + + if (!data || !data_out) + return 0; + + if (copy_size > kattr->test.ctx_size_out) { + copy_size = kattr->test.ctx_size_out; + err = -ENOSPC; + } + + if (copy_to_user(data_out, data, copy_size)) + goto out; + if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) + goto out; + if (err != -ENOSPC) + err = 0; +out: + return err; +} + +/** + * range_is_zero - test whether buffer is initialized + * @buf: buffer to check + * @from: check from this position + * @to: check up until (excluding) this position + * + * This function returns true if the there is a non-zero byte + * in the buf in the range [from,to). + */ +static inline bool range_is_zero(void *buf, size_t from, size_t to) +{ + return !memchr_inv((u8 *)buf + from, 0, to - from); +} + +static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) +{ + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + + if (!__skb) + return 0; + + /* make sure the fields we don't use are zeroed */ + if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority))) + return -EINVAL; + + /* priority is allowed */ + + if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) + + FIELD_SIZEOF(struct __sk_buff, priority), + offsetof(struct __sk_buff, cb))) + return -EINVAL; + + /* cb is allowed */ + + if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) + + FIELD_SIZEOF(struct __sk_buff, cb), + sizeof(struct __sk_buff))) + return -EINVAL; + + skb->priority = __skb->priority; + memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); + + return 0; +} + +static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) +{ + struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; + + if (!__skb) + return; + + __skb->priority = skb->priority; + memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); +} + int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { bool is_l2 = false, is_direct_pkt_access = false; u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; + struct __sk_buff *ctx = NULL; u32 retval, duration; int hh_len = ETH_HLEN; struct sk_buff *skb; @@ -141,6 +255,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(data)) return PTR_ERR(data); + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); + if (IS_ERR(ctx)) { + kfree(data); + return PTR_ERR(ctx); + } + switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -158,6 +278,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, sk = kzalloc(sizeof(struct sock), GFP_USER); if (!sk) { kfree(data); + kfree(ctx); return -ENOMEM; } sock_net_set(sk, current->nsproxy->net_ns); @@ -166,6 +287,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb = build_skb(data, 0); if (!skb) { kfree(data); + kfree(ctx); kfree(sk); return -ENOMEM; } @@ -180,32 +302,37 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); + ret = convert___skb_to_skb(skb, ctx); + if (ret) + goto out; ret = bpf_test_run(prog, skb, repeat, &retval, &duration); - if (ret) { - kfree_skb(skb); - kfree(sk); - return ret; - } + if (ret) + goto out; if (!is_l2) { if (skb_headroom(skb) < hh_len) { int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { - kfree_skb(skb); - kfree(sk); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } memset(__skb_push(skb, hh_len), 0, hh_len); } + convert_skb_to___skb(skb, ctx); size = skb->len; /* bpf program can never convert linear skb to non-linear */ if (WARN_ON_ONCE(skb_is_nonlinear(skb))) size = skb_headlen(skb); ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, ctx, + sizeof(struct __sk_buff)); +out: kfree_skb(skb); kfree(sk); + kfree(ctx); return ret; } -- cgit From 2fa717a0337e7acafda9283c938b635191b8036b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Apr 2019 11:46:13 -0400 Subject: ftrace: Do not process STUB functions in ftrace_ops_list_func() The function_graph tracer has a stub function and its ops flag has the FTRACE_OPS_FL_STUB set. As the function graph does not use the ftrace_ops->func pointer but instead is called by a separate part of the ftrace trampoline. The function_graph tracer still requires to pass in a ftrace_ops that may also hold the hash of the functions to call. But there's no reason to test that hash in the function tracing portion. Instead of testing to see if we should call the stub function, just test if the ops has FTRACE_OPS_FL_STUB set, and just skip it. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bf11e0553450..433a64f49532 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6260,6 +6260,9 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); do_for_each_ftrace_op(op, ftrace_ops_list) { + /* Stub functions don't need to be called nor tested */ + if (op->flags & FTRACE_OPS_FL_STUB) + continue; /* * Check the following for each ops before calling their func: * if RCU flag is set, then rcu_is_watching() must be true -- cgit From b1cd609d9b517f01867c211bd520cc805db3068a Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Tue, 12 Mar 2019 09:27:09 -0700 Subject: bpf: Add base proto function for cgroup-bpf programs Currently kernel/bpf/cgroup.c contains only one program type and one proto function cgroup_dev_func_proto(). It'd be useful to have base proto function that can be reused for new cgroup-bpf program types coming soon. Introduce cgroup_base_func_proto(). Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 4e807973aa80..f6cd38746df2 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -701,7 +701,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); static const struct bpf_func_proto * -cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -725,6 +725,12 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +static const struct bpf_func_proto * +cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return cgroup_base_func_proto(func_id, prog); +} + static bool cgroup_dev_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, -- cgit From 7b146cebe30cb481b0f70d85779da938da818637 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 27 Feb 2019 12:59:24 -0800 Subject: bpf: Sysctl hook Containerized applications may run as root and it may create problems for whole host. Specifically such applications may change a sysctl and affect applications in other containers. Furthermore in existing infrastructure it may not be possible to just completely disable writing to sysctl, instead such a process should be gradual with ability to log what sysctl are being changed by a container, investigate, limit the set of writable sysctl to currently used ones (so that new ones can not be changed) and eventually reduce this set to zero. The patch introduces new program type BPF_PROG_TYPE_CGROUP_SYSCTL and attach type BPF_CGROUP_SYSCTL to solve these problems on cgroup basis. New program type has access to following minimal context: struct bpf_sysctl { __u32 write; }; Where @write indicates whether sysctl is being read (= 0) or written (= 1). Helpers to access sysctl name and value will be introduced separately. BPF_CGROUP_SYSCTL attach point is added to sysctl code right before passing control to ctl_table->proc_handler so that BPF program can either allow or deny access to sysctl. Suggested-by: Roman Gushchin Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- fs/proc/proc_sysctl.c | 5 +++ include/linux/bpf-cgroup.h | 18 +++++++++ include/linux/bpf_types.h | 1 + include/linux/filter.h | 8 ++++ include/uapi/linux/bpf.h | 9 +++++ kernel/bpf/cgroup.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 7 ++++ kernel/bpf/verifier.c | 1 + 8 files changed, 141 insertions(+) (limited to 'kernel') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d65390727541..e01b02150340 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -588,6 +589,10 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, if (!table->proc_handler) goto out; + error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write); + if (error) + goto out; + /* careful: calling conventions are nasty here */ res = count; error = table->proc_handler(table, write, buf, &res, ppos); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a4c644c1c091..b1c45da20a26 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -17,6 +17,8 @@ struct bpf_map; struct bpf_prog; struct bpf_sock_ops_kern; struct bpf_cgroup_storage; +struct ctl_table; +struct ctl_table_header; #ifdef CONFIG_CGROUP_BPF @@ -109,6 +111,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); +int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, + struct ctl_table *table, int write, + enum bpf_attach_type type); + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -253,6 +259,17 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, \ __ret; \ }) + + +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ + BPF_CGROUP_SYSCTL); \ + __ret; \ +}) + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -321,6 +338,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 08bf2f1fe553..d26991a16894 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -28,6 +28,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) #endif #ifdef CONFIG_BPF_LIRC_MODE2 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) diff --git a/include/linux/filter.h b/include/linux/filter.h index 6074aa064b54..a17732057880 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -33,6 +33,8 @@ struct bpf_prog_aux; struct xdp_rxq_info; struct xdp_buff; struct sock_reuseport; +struct ctl_table; +struct ctl_table_header; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function @@ -1177,4 +1179,10 @@ struct bpf_sock_ops_kern { */ }; +struct bpf_sysctl_kern { + struct ctl_table_header *head; + struct ctl_table *table; + int write; +}; + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e96d0b4bf65..cc2a2466d5f3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -167,6 +167,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_PROG_TYPE_CGROUP_SYSCTL, }; enum bpf_attach_type { @@ -188,6 +189,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, BPF_FLOW_DISSECTOR, + BPF_CGROUP_SYSCTL, __MAX_BPF_ATTACH_TYPE }; @@ -3308,4 +3310,11 @@ struct bpf_line_info { struct bpf_spin_lock { __u32 val; }; + +struct bpf_sysctl { + __u32 write; /* Sysctl is being read (= 0) or written (= 1). + * Allows 1,2,4-byte read, but no write. + */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index f6cd38746df2..610491b5f0aa 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -768,3 +770,93 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { .get_func_proto = cgroup_dev_func_proto, .is_valid_access = cgroup_dev_is_valid_access, }; + +/** + * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl + * + * @head: sysctl table header + * @table: sysctl table + * @write: sysctl is being read (= 0) or written (= 1) + * @type: type of program to be executed + * + * Program is run when sysctl is being accessed, either read or written, and + * can allow or deny such access. + * + * This function will return %-EPERM if an attached program is found and + * returned value != 1 during execution. In all other cases 0 is returned. + */ +int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, + struct ctl_table *table, int write, + enum bpf_attach_type type) +{ + struct bpf_sysctl_kern ctx = { + .head = head, + .table = table, + .write = write, + }; + struct cgroup *cgrp; + int ret; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); + rcu_read_unlock(); + + return ret == 1 ? 0 : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); + +static const struct bpf_func_proto * +sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return cgroup_base_func_proto(func_id, prog); +} + +static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + const int size_default = sizeof(__u32); + + if (off < 0 || off + size > sizeof(struct bpf_sysctl) || + off % size || type != BPF_READ) + return false; + + switch (off) { + case offsetof(struct bpf_sysctl, write): + bpf_ctx_record_field_size(info, size_default); + return bpf_ctx_narrow_access_ok(off, size, size_default); + default: + return false; + } +} + +static u32 sysctl_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct bpf_sysctl, write): + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sysctl_kern, write, + FIELD_SIZEOF(struct bpf_sysctl_kern, + write), + target_size)); + break; + } + + return insn - insn_buf; +} + +const struct bpf_verifier_ops cg_sysctl_verifier_ops = { + .get_func_proto = sysctl_func_proto, + .is_valid_access = sysctl_is_valid_access, + .convert_ctx_access = sysctl_convert_ctx_access, +}; + +const struct bpf_prog_ops cg_sysctl_prog_ops = { +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d995eedfdd16..92c9b8a32b50 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1888,6 +1888,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_FLOW_DISSECTOR: ptype = BPF_PROG_TYPE_FLOW_DISSECTOR; break; + case BPF_CGROUP_SYSCTL: + ptype = BPF_PROG_TYPE_CGROUP_SYSCTL; + break; default: return -EINVAL; } @@ -1966,6 +1969,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) return lirc_prog_detach(attr); case BPF_FLOW_DISSECTOR: return skb_flow_dissector_bpf_prog_detach(attr); + case BPF_CGROUP_SYSCTL: + ptype = BPF_PROG_TYPE_CGROUP_SYSCTL; + break; default: return -EINVAL; } @@ -1999,6 +2005,7 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_UDP6_SENDMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: + case BPF_CGROUP_SYSCTL: break; case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f25b7c9c20ba..20808e3c95a8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5267,6 +5267,7 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_CGROUP_SYSCTL: break; default: return 0; -- cgit From 808649fb787d918a48a360a668ee4ee9023f0c11 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 27 Feb 2019 13:28:48 -0800 Subject: bpf: Introduce bpf_sysctl_get_name helper Add bpf_sysctl_get_name() helper to copy sysctl name (/proc/sys/ entry) into provided by BPF_PROG_TYPE_CGROUP_SYSCTL program buffer. By default full name (w/o /proc/sys/) is copied, e.g. "net/ipv4/tcp_mem". If BPF_F_SYSCTL_BASE_NAME flag is set, only base name will be copied, e.g. "tcp_mem". Documentation for the new helper is provided in bpf.h UAPI. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 22 ++++++++++++++- kernel/bpf/cgroup.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cc2a2466d5f3..9c8a2f3ccb9b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2506,6 +2506,22 @@ union bpf_attr { * Return * 0 if iph and th are a valid SYN cookie ACK, or a negative error * otherwise. + * + * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * Description + * Get name of sysctl in /proc/sys/ and copy it into provided by + * program buffer *buf* of size *buf_len*. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is + * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name + * only (e.g. "tcp_mem"). + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2608,7 +2624,8 @@ union bpf_attr { FN(skb_ecn_set_ce), \ FN(get_listener_sock), \ FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), + FN(tcp_check_syncookie), \ + FN(sysctl_get_name), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2681,6 +2698,9 @@ enum bpf_func_id { BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) +/* BPF_FUNC_sysctl_get_name flags. */ +#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 610491b5f0aa..a68387043244 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -806,10 +807,77 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); +static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, + size_t *lenp) +{ + ssize_t tmp_ret = 0, ret; + + if (dir->header.parent) { + tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); + if (tmp_ret < 0) + return tmp_ret; + } + + ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); + if (ret < 0) + return ret; + *bufp += ret; + *lenp -= ret; + ret += tmp_ret; + + /* Avoid leading slash. */ + if (!ret) + return ret; + + tmp_ret = strscpy(*bufp, "/", *lenp); + if (tmp_ret < 0) + return tmp_ret; + *bufp += tmp_ret; + *lenp -= tmp_ret; + + return ret + tmp_ret; +} + +BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, + size_t, buf_len, u64, flags) +{ + ssize_t tmp_ret = 0, ret; + + if (!buf) + return -EINVAL; + + if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { + if (!ctx->head) + return -EINVAL; + tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); + if (tmp_ret < 0) + return tmp_ret; + } + + ret = strscpy(buf, ctx->table->procname, buf_len); + + return ret < 0 ? ret : tmp_ret + ret; +} + +static const struct bpf_func_proto bpf_sysctl_get_name_proto = { + .func = bpf_sysctl_get_name, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return cgroup_base_func_proto(func_id, prog); + switch (func_id) { + case BPF_FUNC_sysctl_get_name: + return &bpf_sysctl_get_name_proto; + default: + return cgroup_base_func_proto(func_id, prog); + } } static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, -- cgit From 1d11b3016cec4ed9770b98e82a61708c8f4926e7 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 28 Feb 2019 19:22:15 -0800 Subject: bpf: Introduce bpf_sysctl_get_current_value helper Add bpf_sysctl_get_current_value() helper to copy current sysctl value into provided by BPF_PROG_TYPE_CGROUP_SYSCTL program buffer. It provides same string as user space can see by reading corresponding file in /proc/sys/, including new line, etc. Documentation for the new helper is provided in bpf.h UAPI. Since current value is kept in ctl_table->data in a parsed form, ctl_table->proc_handler() with write=0 is called to read that data and convert it to a string. Such a string can later be parsed by a program using helpers that will be introduced separately. Unfortunately it's not trivial to provide API to access parsed data due to variety of data representations (string, intvec, uintvec, ulongvec, custom structures, even NULL, etc). Instead it's assumed that user know how to handle specific sysctl they're interested in and appropriate helpers can be used. Since ctl_table->proc_handler() expects __user buffer, conversion to __user happens for kernel allocated one where the value is stored. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 2 ++ include/uapi/linux/bpf.h | 22 +++++++++++++++- kernel/bpf/cgroup.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/filter.h b/include/linux/filter.h index a17732057880..f254ff92819f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1182,6 +1182,8 @@ struct bpf_sock_ops_kern { struct bpf_sysctl_kern { struct ctl_table_header *head; struct ctl_table *table; + void *cur_val; + size_t cur_len; int write; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9c8a2f3ccb9b..063543afa359 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2522,6 +2522,25 @@ union bpf_attr { * * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). + * + * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get current value of sysctl as it is presented in /proc/sys + * (incl. newline, etc), and copy it as a string into provided + * by program buffer *buf* of size *buf_len*. + * + * The whole value is copied, no matter what file position user + * space issued e.g. sys_read at. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if current value was unavailable, e.g. because + * sysctl is uninitialized and read returns -EIO for it. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2625,7 +2644,8 @@ union bpf_attr { FN(get_listener_sock), \ FN(skc_lookup_tcp), \ FN(tcp_check_syncookie), \ - FN(sysctl_get_name), + FN(sysctl_get_name), \ + FN(sysctl_get_current_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index a68387043244..c6b2cf29a54b 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -794,15 +794,37 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, .head = head, .table = table, .write = write, + .cur_val = NULL, + .cur_len = PAGE_SIZE, }; struct cgroup *cgrp; int ret; + ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); + if (ctx.cur_val) { + mm_segment_t old_fs; + loff_t pos = 0; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + if (table->proc_handler(table, 0, (void __user *)ctx.cur_val, + &ctx.cur_len, &pos)) { + /* Let BPF program decide how to proceed. */ + ctx.cur_len = 0; + } + set_fs(old_fs); + } else { + /* Let BPF program decide how to proceed. */ + ctx.cur_len = 0; + } + rcu_read_lock(); cgrp = task_dfl_cgroup(current); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); rcu_read_unlock(); + kfree(ctx.cur_val); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); @@ -869,12 +891,55 @@ static const struct bpf_func_proto bpf_sysctl_get_name_proto = { .arg4_type = ARG_ANYTHING, }; +static int copy_sysctl_value(char *dst, size_t dst_len, char *src, + size_t src_len) +{ + if (!dst) + return -EINVAL; + + if (!dst_len) + return -E2BIG; + + if (!src || !src_len) { + memset(dst, 0, dst_len); + return -EINVAL; + } + + memcpy(dst, src, min(dst_len, src_len)); + + if (dst_len > src_len) { + memset(dst + src_len, '\0', dst_len - src_len); + return src_len; + } + + dst[dst_len - 1] = '\0'; + + return -E2BIG; +} + +BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx, + char *, buf, size_t, buf_len) +{ + return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len); +} + +static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { + .func = bpf_sysctl_get_current_value, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_sysctl_get_name: return &bpf_sysctl_get_name_proto; + case BPF_FUNC_sysctl_get_current_value: + return &bpf_sysctl_get_current_value_proto; default: return cgroup_base_func_proto(func_id, prog); } -- cgit From 4e63acdff864654cee0ac5aaeda3913798ee78f6 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 7 Mar 2019 18:38:43 -0800 Subject: bpf: Introduce bpf_sysctl_{get,set}_new_value helpers Add helpers to work with new value being written to sysctl by user space. bpf_sysctl_get_new_value() copies value being written to sysctl into provided buffer. bpf_sysctl_set_new_value() overrides new value being written by user space with a one from provided buffer. Buffer should contain string representation of the value, similar to what can be seen in /proc/sys/. Both helpers can be used only on sysctl write. File position matters and can be managed by an interface that will be introduced separately. E.g. if user space calls sys_write to a file in /proc/sys/ at file position = X, where X > 0, then the value set by bpf_sysctl_set_new_value() will be written starting from X. If program wants to override whole value with specified buffer, file position has to be set to zero. Documentation for the new helpers is provided in bpf.h UAPI. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- fs/proc/proc_sysctl.c | 22 ++++++++++--- include/linux/bpf-cgroup.h | 8 +++-- include/linux/filter.h | 3 ++ include/uapi/linux/bpf.h | 38 +++++++++++++++++++++- kernel/bpf/cgroup.c | 81 +++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 142 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index e01b02150340..023101c6f0d7 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -570,8 +570,8 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; + void *new_buf = NULL; ssize_t error; - size_t res; if (IS_ERR(head)) return PTR_ERR(head); @@ -589,15 +589,27 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, if (!table->proc_handler) goto out; - error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write); + error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count, + &new_buf); if (error) goto out; /* careful: calling conventions are nasty here */ - res = count; - error = table->proc_handler(table, write, buf, &res, ppos); + if (new_buf) { + mm_segment_t old_fs; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + error = table->proc_handler(table, write, (void __user *)new_buf, + &count, ppos); + set_fs(old_fs); + kfree(new_buf); + } else { + error = table->proc_handler(table, write, buf, &count, ppos); + } + if (!error) - error = res; + error = count; out: sysctl_head_finish(head); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index b1c45da20a26..1e97271f9a10 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -113,7 +113,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - enum bpf_attach_type type); + void __user *buf, size_t *pcount, + void **new_buf, enum bpf_attach_type type); static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) @@ -261,11 +262,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) \ +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ + buf, count, nbuf, \ BPF_CGROUP_SYSCTL); \ __ret; \ }) @@ -338,7 +340,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/filter.h b/include/linux/filter.h index f254ff92819f..a23653f9460c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1184,6 +1184,9 @@ struct bpf_sysctl_kern { struct ctl_table *table; void *cur_val; size_t cur_len; + void *new_val; + size_t new_len; + int new_updated; int write; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 063543afa359..547b8258d731 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2541,6 +2541,40 @@ union bpf_attr { * * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. + * + * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * Description + * Get new value being written by user space to sysctl (before + * the actual write happens) and copy it as a string into + * provided by program buffer *buf* of size *buf_len*. + * + * User space may write new value at file position > 0. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). + * + * **-EINVAL** if sysctl is being read. + * + * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * Description + * Override new value being written by user space to sysctl with + * value provided by program in buffer *buf* of size *buf_len*. + * + * *buf* should contain a string in same form as provided by user + * space on sysctl write. + * + * User space may write new value at file position > 0. To override + * the whole sysctl value file position should be set to zero. + * Return + * 0 on success. + * + * **-E2BIG** if the *buf_len* is too big. + * + * **-EINVAL** if sysctl is being read. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2645,7 +2679,9 @@ union bpf_attr { FN(skc_lookup_tcp), \ FN(tcp_check_syncookie), \ FN(sysctl_get_name), \ - FN(sysctl_get_current_value), + FN(sysctl_get_current_value), \ + FN(sysctl_get_new_value), \ + FN(sysctl_set_new_value), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index c6b2cf29a54b..ba4e21986760 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -778,6 +778,13 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { * @head: sysctl table header * @table: sysctl table * @write: sysctl is being read (= 0) or written (= 1) + * @buf: pointer to buffer passed by user space + * @pcount: value-result argument: value is size of buffer pointed to by @buf, + * result is size of @new_buf if program set new value, initial value + * otherwise + * @new_buf: pointer to pointer to new buffer that will be allocated if program + * overrides new value provided by user space on sysctl write + * NOTE: it's caller responsibility to free *new_buf if it was set * @type: type of program to be executed * * Program is run when sysctl is being accessed, either read or written, and @@ -788,7 +795,8 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { */ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - enum bpf_attach_type type) + void __user *buf, size_t *pcount, + void **new_buf, enum bpf_attach_type type) { struct bpf_sysctl_kern ctx = { .head = head, @@ -796,6 +804,9 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, .write = write, .cur_val = NULL, .cur_len = PAGE_SIZE, + .new_val = NULL, + .new_len = 0, + .new_updated = 0, }; struct cgroup *cgrp; int ret; @@ -818,6 +829,18 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, ctx.cur_len = 0; } + if (write && buf && *pcount) { + /* BPF program should be able to override new value with a + * buffer bigger than provided by user. + */ + ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); + ctx.new_len = min(PAGE_SIZE, *pcount); + if (!ctx.new_val || + copy_from_user(ctx.new_val, buf, ctx.new_len)) + /* Let BPF program decide how to proceed. */ + ctx.new_len = 0; + } + rcu_read_lock(); cgrp = task_dfl_cgroup(current); ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); @@ -825,6 +848,13 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, kfree(ctx.cur_val); + if (ret == 1 && ctx.new_updated) { + *new_buf = ctx.new_val; + *pcount = ctx.new_len; + } else { + kfree(ctx.new_val); + } + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); @@ -932,6 +962,51 @@ static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { .arg3_type = ARG_CONST_SIZE, }; +BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf, + size_t, buf_len) +{ + if (!ctx->write) { + if (buf && buf_len) + memset(buf, '\0', buf_len); + return -EINVAL; + } + return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len); +} + +static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = { + .func = bpf_sysctl_get_new_value, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx, + const char *, buf, size_t, buf_len) +{ + if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len) + return -EINVAL; + + if (buf_len > PAGE_SIZE - 1) + return -E2BIG; + + memcpy(ctx->new_val, buf, buf_len); + ctx->new_len = buf_len; + ctx->new_updated = 1; + + return 0; +} + +static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { + .func = bpf_sysctl_set_new_value, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -940,6 +1015,10 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sysctl_get_name_proto; case BPF_FUNC_sysctl_get_current_value: return &bpf_sysctl_get_current_value_proto; + case BPF_FUNC_sysctl_get_new_value: + return &bpf_sysctl_get_new_value_proto; + case BPF_FUNC_sysctl_set_new_value: + return &bpf_sysctl_set_new_value_proto; default: return cgroup_base_func_proto(func_id, prog); } -- cgit From e1550bfe0de47e30484ba91de1e50a91ec1c31f5 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 7 Mar 2019 18:50:52 -0800 Subject: bpf: Add file_pos field to bpf_sysctl ctx Add file_pos field to bpf_sysctl context to read and write sysctl file position at which sysctl is being accessed (read or written). The field can be used to e.g. override whole sysctl value on write to sysctl even when sys_write is called by user space with file_pos > 0. Or BPF program may reject such accesses. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- fs/proc/proc_sysctl.c | 2 +- include/linux/bpf-cgroup.h | 9 ++++---- include/linux/filter.h | 3 +++ include/uapi/linux/bpf.h | 3 +++ kernel/bpf/cgroup.c | 54 +++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 63 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 023101c6f0d7..2d61e5e8c863 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -590,7 +590,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, goto out; error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count, - &new_buf); + ppos, &new_buf); if (error) goto out; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 1e97271f9a10..cb3c6b3b89c8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -114,7 +114,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, void __user *buf, size_t *pcount, - void **new_buf, enum bpf_attach_type type); + loff_t *ppos, void **new_buf, + enum bpf_attach_type type); static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) @@ -262,12 +263,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, nbuf) \ +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ - buf, count, nbuf, \ + buf, count, pos, nbuf, \ BPF_CGROUP_SYSCTL); \ __ret; \ }) @@ -340,7 +341,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,nbuf) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/filter.h b/include/linux/filter.h index a23653f9460c..fb0edad75971 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1188,6 +1188,9 @@ struct bpf_sysctl_kern { size_t new_len; int new_updated; int write; + loff_t *ppos; + /* Temporary "register" for indirect stores to ppos. */ + u64 tmp_reg; }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 547b8258d731..89976de909af 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3391,6 +3391,9 @@ struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. */ + __u32 file_pos; /* Sysctl file position to read from, write to. + * Allows 1,2,4-byte read an 4-byte write. + */ }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ba4e21986760..b2adf22139b3 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -782,6 +782,9 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { * @pcount: value-result argument: value is size of buffer pointed to by @buf, * result is size of @new_buf if program set new value, initial value * otherwise + * @ppos: value-result argument: value is position at which read from or write + * to sysctl is happening, result is new position if program overrode it, + * initial value otherwise * @new_buf: pointer to pointer to new buffer that will be allocated if program * overrides new value provided by user space on sysctl write * NOTE: it's caller responsibility to free *new_buf if it was set @@ -796,12 +799,14 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, void __user *buf, size_t *pcount, - void **new_buf, enum bpf_attach_type type) + loff_t *ppos, void **new_buf, + enum bpf_attach_type type) { struct bpf_sysctl_kern ctx = { .head = head, .table = table, .write = write, + .ppos = ppos, .cur_val = NULL, .cur_len = PAGE_SIZE, .new_val = NULL, @@ -1030,14 +1035,22 @@ static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, { const int size_default = sizeof(__u32); - if (off < 0 || off + size > sizeof(struct bpf_sysctl) || - off % size || type != BPF_READ) + if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size) return false; switch (off) { case offsetof(struct bpf_sysctl, write): + if (type != BPF_READ) + return false; bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); + case offsetof(struct bpf_sysctl, file_pos): + if (type == BPF_READ) { + bpf_ctx_record_field_size(info, size_default); + return bpf_ctx_narrow_access_ok(off, size, size_default); + } else { + return size == size_default; + } default: return false; } @@ -1059,6 +1072,41 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, write), target_size)); break; + case offsetof(struct bpf_sysctl, file_pos): + /* ppos is a pointer so it should be accessed via indirect + * loads and stores. Also for stores additional temporary + * register is used since neither src_reg nor dst_reg can be + * overridden. + */ + if (type == BPF_WRITE) { + int treg = BPF_REG_9; + + if (si->src_reg == treg || si->dst_reg == treg) + --treg; + if (si->src_reg == treg || si->dst_reg == treg) + --treg; + *insn++ = BPF_STX_MEM( + BPF_DW, si->dst_reg, treg, + offsetof(struct bpf_sysctl_kern, tmp_reg)); + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), + treg, si->dst_reg, + offsetof(struct bpf_sysctl_kern, ppos)); + *insn++ = BPF_STX_MEM( + BPF_SIZEOF(u32), treg, si->src_reg, 0); + *insn++ = BPF_LDX_MEM( + BPF_DW, treg, si->dst_reg, + offsetof(struct bpf_sysctl_kern, tmp_reg)); + } else { + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sysctl_kern, ppos)); + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0); + } + *target_size = sizeof(u32); + break; } return insn - insn_buf; -- cgit From 57c3bb725a3dd97d960d7e1cd0845d88de53217f Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 18 Mar 2019 16:57:10 -0700 Subject: bpf: Introduce ARG_PTR_TO_{INT,LONG} arg types Currently the way to pass result from BPF helper to BPF program is to provide memory area defined by pointer and size: func(void *, size_t). It works great for generic use-case, but for simple types, such as int, it's overkill and consumes two arguments when it could use just one. Introduce new argument types ARG_PTR_TO_INT and ARG_PTR_TO_LONG to be able to pass result from helper to program via pointer to int and long correspondingly: func(int *) or func(long *). New argument types are similar to ARG_PTR_TO_MEM with the following differences: * they don't require corresponding ARG_CONST_SIZE argument, predefined access sizes are used instead (32bit for int, 64bit for long); * it's possible to use more than one such an argument in a helper; * provided pointers have to be aligned. It's easy to introduce similar ARG_PTR_TO_CHAR and ARG_PTR_TO_SHORT argument types. It's not done due to lack of use-case though. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/verifier.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e4d4c1771ab0..fd06ada941ad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -202,6 +202,8 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ + ARG_PTR_TO_INT, /* pointer to int */ + ARG_PTR_TO_LONG, /* pointer to long */ }; /* type of values returned from helper functions */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20808e3c95a8..15ab6fa817ce 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2462,6 +2462,22 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type) type == ARG_CONST_SIZE_OR_ZERO; } +static bool arg_type_is_int_ptr(enum bpf_arg_type type) +{ + return type == ARG_PTR_TO_INT || + type == ARG_PTR_TO_LONG; +} + +static int int_ptr_type_to_size(enum bpf_arg_type type) +{ + if (type == ARG_PTR_TO_INT) + return sizeof(u32); + else if (type == ARG_PTR_TO_LONG) + return sizeof(u64); + + return -EINVAL; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) @@ -2554,6 +2570,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, type != expected_type) goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; + } else if (arg_type_is_int_ptr(arg_type)) { + expected_type = PTR_TO_STACK; + if (!type_is_pkt_pointer(type) && + type != PTR_TO_MAP_VALUE && + type != expected_type) + goto err_type; } else { verbose(env, "unsupported arg_type %d\n", arg_type); return -EFAULT; @@ -2635,6 +2657,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, err = check_helper_mem_access(env, regno - 1, reg->umax_value, zero_size_allowed, meta); + } else if (arg_type_is_int_ptr(arg_type)) { + int size = int_ptr_type_to_size(arg_type); + + err = check_helper_mem_access(env, regno, size, false, meta); + if (err) + return err; + err = check_ptr_alignment(env, reg, 0, size, true); } return err; -- cgit From d7a4cb9b6705a89937d12c8158a35a3145dc967a Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Mon, 18 Mar 2019 17:55:26 -0700 Subject: bpf: Introduce bpf_strtol and bpf_strtoul helpers Add bpf_strtol and bpf_strtoul to convert a string to long and unsigned long correspondingly. It's similar to user space strtol(3) and strtoul(3) with a few changes to the API: * instead of NUL-terminated C string the helpers expect buffer and buffer length; * resulting long or unsigned long is returned in a separate result-argument; * return value is used to indicate success or failure, on success number of consumed bytes is returned that can be used to identify position to read next if the buffer is expected to contain multiple integers; * instead of *base* argument, *flags* is used that provides base in 5 LSB, other bits are reserved for future use; * number of supported bases is limited. Documentation for the new helpers is provided in bpf.h UAPI. The helpers are made available to BPF_PROG_TYPE_CGROUP_SYSCTL programs to be able to convert string input to e.g. "ulongvec" output. E.g. "net/ipv4/tcp_mem" consists of three ulong integers. They can be parsed by calling to bpf_strtoul three times. Implementation notes: Implementation includes "../../lib/kstrtox.h" to reuse integer parsing functions. It's done exactly same way as fs/proc/base.c already does. Unfortunately existing kstrtoX function can't be used directly since they fail if any invalid character is present right after integer in the string. Existing simple_strtoX functions can't be used either since they're obsolete and don't handle overflow properly. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 + include/uapi/linux/bpf.h | 51 +++++++++++++++++- kernel/bpf/cgroup.c | 4 ++ kernel/bpf/helpers.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fd06ada941ad..f15432d90728 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -989,6 +989,8 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto; extern const struct bpf_func_proto bpf_spin_lock_proto; extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; +extern const struct bpf_func_proto bpf_strtol_proto; +extern const struct bpf_func_proto bpf_strtoul_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 89976de909af..c26be24fd5e2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2575,6 +2575,53 @@ union bpf_attr { * **-E2BIG** if the *buf_len* is too big. * * **-EINVAL** if sysctl is being read. + * + * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to a long integer according to the given base + * and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by isspace(3)) followed by a single optional '-' + * sign. + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space strtol(3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than buf_len. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. + * + * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * Description + * Convert the initial part of the string from buffer *buf* of + * size *buf_len* to an unsigned long integer according to the + * given base and save the result in *res*. + * + * The string may begin with an arbitrary amount of white space + * (as determined by isspace(3)). + * + * Five least significant bits of *flags* encode base, other bits + * are currently unused. + * + * Base must be either 8, 10, 16 or 0 to detect it automatically + * similar to user space strtoul(3). + * Return + * Number of characters consumed on success. Must be positive but + * no more than buf_len. + * + * **-EINVAL** if no valid digits were found or unsupported base + * was provided. + * + * **-ERANGE** if resulting value was out of range. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2681,7 +2728,9 @@ union bpf_attr { FN(sysctl_get_name), \ FN(sysctl_get_current_value), \ FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), + FN(sysctl_set_new_value), \ + FN(strtol), \ + FN(strtoul), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index b2adf22139b3..789d4ab2336e 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1016,6 +1016,10 @@ static const struct bpf_func_proto * sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { + case BPF_FUNC_strtol: + return &bpf_strtol_proto; + case BPF_FUNC_strtoul: + return &bpf_strtoul_proto; case BPF_FUNC_sysctl_get_name: return &bpf_sysctl_get_name_proto; case BPF_FUNC_sysctl_get_current_value: diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a411fc17d265..4266ffde07ca 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -18,6 +18,9 @@ #include #include #include +#include + +#include "../../lib/kstrtox.h" /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -363,4 +366,132 @@ const struct bpf_func_proto bpf_get_local_storage_proto = { .arg2_type = ARG_ANYTHING, }; #endif + +#define BPF_STRTOX_BASE_MASK 0x1F + +static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, + unsigned long long *res, bool *is_negative) +{ + unsigned int base = flags & BPF_STRTOX_BASE_MASK; + const char *cur_buf = buf; + size_t cur_len = buf_len; + unsigned int consumed; + size_t val_len; + char str[64]; + + if (!buf || !buf_len || !res || !is_negative) + return -EINVAL; + + if (base != 0 && base != 8 && base != 10 && base != 16) + return -EINVAL; + + if (flags & ~BPF_STRTOX_BASE_MASK) + return -EINVAL; + + while (cur_buf < buf + buf_len && isspace(*cur_buf)) + ++cur_buf; + + *is_negative = (cur_buf < buf + buf_len && *cur_buf == '-'); + if (*is_negative) + ++cur_buf; + + consumed = cur_buf - buf; + cur_len -= consumed; + if (!cur_len) + return -EINVAL; + + cur_len = min(cur_len, sizeof(str) - 1); + memcpy(str, cur_buf, cur_len); + str[cur_len] = '\0'; + cur_buf = str; + + cur_buf = _parse_integer_fixup_radix(cur_buf, &base); + val_len = _parse_integer(cur_buf, base, res); + + if (val_len & KSTRTOX_OVERFLOW) + return -ERANGE; + + if (val_len == 0) + return -EINVAL; + + cur_buf += val_len; + consumed += cur_buf - str; + + return consumed; +} + +static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, + long long *res) +{ + unsigned long long _res; + bool is_negative; + int err; + + err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); + if (err < 0) + return err; + if (is_negative) { + if ((long long)-_res > 0) + return -ERANGE; + *res = -_res; + } else { + if ((long long)_res < 0) + return -ERANGE; + *res = _res; + } + return err; +} + +BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, + long *, res) +{ + long long _res; + int err; + + err = __bpf_strtoll(buf, buf_len, flags, &_res); + if (err < 0) + return err; + if (_res != (long)_res) + return -ERANGE; + *res = _res; + return err; +} + +const struct bpf_func_proto bpf_strtol_proto = { + .func = bpf_strtol, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +}; + +BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, + unsigned long *, res) +{ + unsigned long long _res; + bool is_negative; + int err; + + err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); + if (err < 0) + return err; + if (is_negative) + return -EINVAL; + if (_res != (unsigned long)_res) + return -ERANGE; + *res = _res; + return err; +} + +const struct bpf_func_proto bpf_strtoul_proto = { + .func = bpf_strtoul, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +}; #endif -- cgit From 51356ac89b5a15e5207e8740d5f4f8b71cb7332f Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 12 Apr 2019 16:01:01 -0700 Subject: bpf: Fix distinct pointer types warning for ARCH=i386 Fix a new warning reported by kbuild for make ARCH=i386: In file included from kernel/bpf/cgroup.c:11:0: kernel/bpf/cgroup.c: In function '__cgroup_bpf_run_filter_sysctl': include/linux/kernel.h:827:29: warning: comparison of distinct pointer types lacks a cast (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) ^ include/linux/kernel.h:841:4: note: in expansion of macro '__typecheck' (__typecheck(x, y) && __no_side_effects(x, y)) ^~~~~~~~~~~ include/linux/kernel.h:851:24: note: in expansion of macro '__safe_cmp' __builtin_choose_expr(__safe_cmp(x, y), \ ^~~~~~~~~~ include/linux/kernel.h:860:19: note: in expansion of macro '__careful_cmp' #define min(x, y) __careful_cmp(x, y, <) ^~~~~~~~~~~~~ >> kernel/bpf/cgroup.c:837:17: note: in expansion of macro 'min' ctx.new_len = min(PAGE_SIZE, *pcount); ^~~ Fixes: 4e63acdff864 ("bpf: Introduce bpf_sysctl_{get,set}_new_value helpers") Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 789d4ab2336e..e58a6c247f56 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -839,7 +839,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, * buffer bigger than provided by user. */ ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); - ctx.new_len = min(PAGE_SIZE, *pcount); + ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); if (!ctx.new_val || copy_from_user(ctx.new_val, buf, ctx.new_len)) /* Let BPF program decide how to proceed. */ -- cgit From 53b29c336830db48ad3dc737f88b8c065b1f0851 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 12 Apr 2019 19:38:26 +0800 Subject: swiotlb: save io_tlb_used to local variable before leaving critical section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When swiotlb is full, the kernel would print io_tlb_used. However, the result might be inaccurate at that time because we have left the critical section protected by spinlock. Therefore, we backup the io_tlb_used into local variable before leaving critical section. Fixes: 83ca25948940 ("swiotlb: dump used and total slots when swiotlb buffer is full") Suggested-by: Håkon Bugge Signed-off-by: Dongli Zhang Signed-off-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 82c767374c70..38d57218809c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -445,6 +445,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, unsigned long mask; unsigned long offset_slots; unsigned long max_slots; + unsigned long tmp_io_tlb_used; if (no_iotlb_memory) panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); @@ -531,10 +532,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, } while (index != wrap); not_found: + tmp_io_tlb_used = io_tlb_used; + spin_unlock_irqrestore(&io_tlb_lock, flags); if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu, used %lu\n", - size, io_tlb_nslabs, io_tlb_used); + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", + size, io_tlb_nslabs, tmp_io_tlb_used); return DMA_MAPPING_ERROR; found: io_tlb_used += nslots; -- cgit From 1b04aee7e2182454a663950e68084fa5ada9625a Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 12 Apr 2019 22:59:34 +0100 Subject: bpf: refactor propagate_liveness to eliminate duplicated for loop Propagation for register and stack slot are finished in separate for loop, while they are perfect to be put into a single loop. This could also let them share some common variables in later patches. Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 15ab6fa817ce..da285df492fd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6254,10 +6254,8 @@ static int propagate_liveness(struct bpf_verifier_env *env, return err; } } - } - /* ... and stack slots */ - for (frame = 0; frame <= vstate->curframe; frame++) { + /* Propagate stack slots. */ state = vstate->frame[frame]; parent = vparent->frame[frame]; for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && -- cgit From 3f8cafa4131f67d47c8de326c7dcd561cc65fb38 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 12 Apr 2019 22:59:35 +0100 Subject: bpf: refactor propagate_liveness to eliminate code redundance Access to reg states were not factored out, the consequence is long code for dereferencing them which made the indentation not good for reading. This patch factor out these code so the core code in the loop could be easier to follow. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index da285df492fd..6fd36a8ba1a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6232,8 +6232,9 @@ static int propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, struct bpf_verifier_state *vparent) { - int i, frame, err = 0; + struct bpf_reg_state *state_reg, *parent_reg; struct bpf_func_state *state, *parent; + int i, frame, err = 0; if (vparent->curframe != vstate->curframe) { WARN(1, "propagate_live: parent frame %d current frame %d\n", @@ -6243,28 +6244,33 @@ static int propagate_liveness(struct bpf_verifier_env *env, /* Propagate read liveness of registers... */ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); for (frame = 0; frame <= vstate->curframe; frame++) { + parent = vparent->frame[frame]; + state = vstate->frame[frame]; + parent_reg = parent->regs; + state_reg = state->regs; /* We don't need to worry about FP liveness, it's read-only */ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { - if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ) + if (parent_reg[i].live & REG_LIVE_READ) continue; - if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) { - err = mark_reg_read(env, &vstate->frame[frame]->regs[i], - &vparent->frame[frame]->regs[i]); - if (err) - return err; - } + if (!(state_reg[i].live & REG_LIVE_READ)) + continue; + err = mark_reg_read(env, &state_reg[i], &parent_reg[i]); + if (err) + return err; } /* Propagate stack slots. */ - state = vstate->frame[frame]; - parent = vparent->frame[frame]; for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && i < parent->allocated_stack / BPF_REG_SIZE; i++) { - if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) + parent_reg = &parent->stack[i].spilled_ptr; + state_reg = &state->stack[i].spilled_ptr; + if (parent_reg->live & REG_LIVE_READ) continue; - if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) - mark_reg_read(env, &state->stack[i].spilled_ptr, - &parent->stack[i].spilled_ptr); + if (!(state_reg->live & REG_LIVE_READ)) + continue; + err = mark_reg_read(env, state_reg, parent_reg); + if (err) + return err; } } return err; -- cgit From 55e7f3b5ac94a8c9f2e35961a45e9aa526a9e41d Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 12 Apr 2019 22:59:36 +0100 Subject: bpf: factor out reg and stack slot propagation into "propagate_liveness_reg" After code refactor in previous patches, the propagation logic inside the for loop in "propagate_liveness" becomes clear that they are good enough to be factored out into a common function "propagate_liveness_reg". Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6fd36a8ba1a0..3fdb301c4f8c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6221,6 +6221,22 @@ static bool states_equal(struct bpf_verifier_env *env, return true; } +static int propagate_liveness_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + struct bpf_reg_state *parent_reg) +{ + int err; + + if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ)) + return 0; + + err = mark_reg_read(env, reg, parent_reg); + if (err) + return err; + + return 0; +} + /* A write screens off any subsequent reads; but write marks come from the * straight-line code between a state and its parent. When we arrive at an * equivalent state (jump target or such) we didn't arrive by the straight-line @@ -6250,11 +6266,8 @@ static int propagate_liveness(struct bpf_verifier_env *env, state_reg = state->regs; /* We don't need to worry about FP liveness, it's read-only */ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { - if (parent_reg[i].live & REG_LIVE_READ) - continue; - if (!(state_reg[i].live & REG_LIVE_READ)) - continue; - err = mark_reg_read(env, &state_reg[i], &parent_reg[i]); + err = propagate_liveness_reg(env, &state_reg[i], + &parent_reg[i]); if (err) return err; } @@ -6264,11 +6277,8 @@ static int propagate_liveness(struct bpf_verifier_env *env, i < parent->allocated_stack / BPF_REG_SIZE; i++) { parent_reg = &parent->stack[i].spilled_ptr; state_reg = &state->stack[i].spilled_ptr; - if (parent_reg->live & REG_LIVE_READ) - continue; - if (!(state_reg->live & REG_LIVE_READ)) - continue; - err = mark_reg_read(env, state_reg, parent_reg); + err = propagate_liveness_reg(env, state_reg, + parent_reg); if (err) return err; } -- cgit From c342dc109aa5a4f0bb36335cb441aaafc98b98ef Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 12 Apr 2019 22:59:37 +0100 Subject: bpf: refactor "check_reg_arg" to eliminate code redundancy There are a few "regs[regno]" here are there across "check_reg_arg", this patch factor it out into a simple "reg" pointer. The intention is to simplify code indentation and make the later patches in this set look cleaner. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3fdb301c4f8c..c7220153c5b1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1177,30 +1177,32 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg, *regs = state->regs; if (regno >= MAX_BPF_REG) { verbose(env, "R%d is invalid\n", regno); return -EINVAL; } + reg = ®s[regno]; if (t == SRC_OP) { /* check whether register used as source operand can be read */ - if (regs[regno].type == NOT_INIT) { + if (reg->type == NOT_INIT) { verbose(env, "R%d !read_ok\n", regno); return -EACCES; } /* We don't need to worry about FP liveness because it's read-only */ - if (regno != BPF_REG_FP) - return mark_reg_read(env, ®s[regno], - regs[regno].parent); + if (regno == BPF_REG_FP) + return 0; + + return mark_reg_read(env, reg, reg->parent); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { verbose(env, "frame pointer is read only\n"); return -EACCES; } - regs[regno].live |= REG_LIVE_WRITTEN; + reg->live |= REG_LIVE_WRITTEN; if (t == DST_OP) mark_reg_unknown(env, regs, regno); } -- cgit From 2d87a0674bd60d855e4008e2d84f5b23d7cb9b7d Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 10 Apr 2019 11:14:19 +0200 Subject: timekeeping: Audit clock adjustments Emit an audit record whenever the system clock is changed (i.e. shifted by a non-zero offset) by a syscall from userspace. The syscalls than can (at the time of writing) trigger such record are: - settimeofday(2), stime(2), clock_settime(2) -- via do_settimeofday64() - adjtimex(2), clock_adjtime(2) -- via do_adjtimex() The new records have type AUDIT_TIME_INJOFFSET and contain the following fields: - sec -- the 'seconds' part of the offset - nsec -- the 'nanoseconds' part of the offset Example record (time was shifted backwards by ~15.875 seconds): type=TIME_INJOFFSET msg=audit(1530616049.652:13): sec=-16 nsec=124887145 The records of this type will be associated with the corresponding syscall records. Signed-off-by: Ondrej Mosnacek Reviewed-by: Richard Guy Briggs Reviewed-by: Thomas Gleixner [PM: fixed a line width problem in __audit_tk_injoffset()] Signed-off-by: Paul Moore --- include/linux/audit.h | 14 ++++++++++++++ include/uapi/linux/audit.h | 1 + kernel/auditsc.c | 7 +++++++ kernel/time/timekeeping.c | 6 ++++++ 4 files changed, 28 insertions(+) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 1e69d9fe16da..2c62c0468888 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -365,6 +365,7 @@ extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_log_kern_module(char *name); extern void __audit_fanotify(unsigned int response); +extern void __audit_tk_injoffset(struct timespec64 offset); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -467,6 +468,16 @@ static inline void audit_fanotify(unsigned int response) __audit_fanotify(response); } +static inline void audit_tk_injoffset(struct timespec64 offset) +{ + /* ignore no-op events */ + if (offset.tv_sec == 0 && offset.tv_nsec == 0) + return; + + if (!audit_dummy_context()) + __audit_tk_injoffset(offset); +} + extern int audit_n_rules; extern int audit_signals; #else /* CONFIG_AUDITSYSCALL */ @@ -580,6 +591,9 @@ static inline void audit_log_kern_module(char *name) static inline void audit_fanotify(unsigned int response) { } +static inline void audit_tk_injoffset(struct timespec64 offset) +{ } + static inline void audit_ptrace(struct task_struct *t) { } #define audit_n_rules 0 diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 3901c51c0b93..ab58d67baf4d 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -114,6 +114,7 @@ #define AUDIT_REPLACE 1329 /* Replace auditd if this packet unanswerd */ #define AUDIT_KERN_MODULE 1330 /* Kernel Module events */ #define AUDIT_FANOTIFY 1331 /* Fanotify access decision */ +#define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 51a2ceb3a1ca..3843495d0083 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2512,6 +2512,13 @@ void __audit_fanotify(unsigned int response) AUDIT_FANOTIFY, "resp=%u", response); } +void __audit_tk_injoffset(struct timespec64 offset) +{ + audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET, + "sec=%lli nsec=%li", + (long long)offset.tv_sec, offset.tv_nsec); +} + static void audit_log_task(struct audit_buffer *ab) { kuid_t auid, uid; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f986e1918d12..3d24be4cd607 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "tick-internal.h" #include "ntp_internal.h" @@ -1250,6 +1251,9 @@ out: /* signal hrtimers about time change */ clock_was_set(); + if (!ret) + audit_tk_injoffset(ts_delta); + return ret; } EXPORT_SYMBOL(do_settimeofday64); @@ -2322,6 +2326,8 @@ int do_adjtimex(struct __kernel_timex *txc) ret = timekeeping_inject_offset(&delta); if (ret) return ret; + + audit_tk_injoffset(delta); } ktime_get_real_ts64(&ts); -- cgit From 7e8eda734d30de81d06a949c9bf9853c445ede4e Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Wed, 10 Apr 2019 11:14:20 +0200 Subject: ntp: Audit NTP parameters adjustment Emit an audit record every time selected NTP parameters are modified from userspace (via adjtimex(2) or clock_adjtime(2)). These parameters may be used to indirectly change system clock, and thus their modifications should be audited. Such events will now generate records of type AUDIT_TIME_ADJNTPVAL containing the following fields: - op -- which value was adjusted: - offset -- corresponding to the time_offset variable - freq -- corresponding to the time_freq variable - status -- corresponding to the time_status variable - adjust -- corresponding to the time_adjust variable - tick -- corresponding to the tick_usec variable - tai -- corresponding to the timekeeping's TAI offset - old -- the old value - new -- the new value Example records: type=TIME_ADJNTPVAL msg=audit(1530616044.507:7): op=status old=64 new=8256 type=TIME_ADJNTPVAL msg=audit(1530616044.511:11): op=freq old=0 new=49180377088000 The records of this type will be associated with the corresponding syscall records. An overview of parameter changes that can be done via do_adjtimex() (based on information from Miroslav Lichvar) and whether they are audited: __timekeeping_set_tai_offset() -- sets the offset from the International Atomic Time (AUDITED) NTP variables: time_offset -- can adjust the clock by up to 0.5 seconds per call and also speed it up or slow down by up to about 0.05% (43 seconds per day) (AUDITED) time_freq -- can speed up or slow down by up to about 0.05% (AUDITED) time_status -- can insert/delete leap seconds and it also enables/ disables synchronization of the hardware real-time clock (AUDITED) time_maxerror, time_esterror -- change error estimates used to inform userspace applications (NOT AUDITED) time_constant -- controls the speed of the clock adjustments that are made when time_offset is set (NOT AUDITED) time_adjust -- can temporarily speed up or slow down the clock by up to 0.05% (AUDITED) tick_usec -- a more extreme version of time_freq; can speed up or slow down the clock by up to 10% (AUDITED) Signed-off-by: Ondrej Mosnacek Reviewed-by: Richard Guy Briggs Reviewed-by: Thomas Gleixner Signed-off-by: Paul Moore --- include/linux/audit.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/audit.h | 1 + kernel/auditsc.c | 22 +++++++++++++++++ kernel/time/ntp.c | 22 ++++++++++++++--- kernel/time/ntp_internal.h | 4 ++- kernel/time/timekeeping.c | 7 +++++- 6 files changed, 112 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2c62c0468888..43a23e28ba23 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -86,6 +86,29 @@ struct audit_field { u32 op; }; +enum audit_ntp_type { + AUDIT_NTP_OFFSET, + AUDIT_NTP_FREQ, + AUDIT_NTP_STATUS, + AUDIT_NTP_TAI, + AUDIT_NTP_TICK, + AUDIT_NTP_ADJUST, + + AUDIT_NTP_NVALS /* count */ +}; + +#ifdef CONFIG_AUDITSYSCALL +struct audit_ntp_val { + long long oldval, newval; +}; + +struct audit_ntp_data { + struct audit_ntp_val vals[AUDIT_NTP_NVALS]; +}; +#else +struct audit_ntp_data {}; +#endif + extern int is_audit_feature_set(int which); extern int __init audit_register_class(int class, unsigned *list); @@ -366,6 +389,7 @@ extern void __audit_mmap_fd(int fd, int flags); extern void __audit_log_kern_module(char *name); extern void __audit_fanotify(unsigned int response); extern void __audit_tk_injoffset(struct timespec64 offset); +extern void __audit_ntp_log(const struct audit_ntp_data *ad); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -478,6 +502,29 @@ static inline void audit_tk_injoffset(struct timespec64 offset) __audit_tk_injoffset(offset); } +static inline void audit_ntp_init(struct audit_ntp_data *ad) +{ + memset(ad, 0, sizeof(*ad)); +} + +static inline void audit_ntp_set_old(struct audit_ntp_data *ad, + enum audit_ntp_type type, long long val) +{ + ad->vals[type].oldval = val; +} + +static inline void audit_ntp_set_new(struct audit_ntp_data *ad, + enum audit_ntp_type type, long long val) +{ + ad->vals[type].newval = val; +} + +static inline void audit_ntp_log(const struct audit_ntp_data *ad) +{ + if (!audit_dummy_context()) + __audit_ntp_log(ad); +} + extern int audit_n_rules; extern int audit_signals; #else /* CONFIG_AUDITSYSCALL */ @@ -594,6 +641,20 @@ static inline void audit_fanotify(unsigned int response) static inline void audit_tk_injoffset(struct timespec64 offset) { } +static inline void audit_ntp_init(struct audit_ntp_data *ad) +{ } + +static inline void audit_ntp_set_old(struct audit_ntp_data *ad, + enum audit_ntp_type type, long long val) +{ } + +static inline void audit_ntp_set_new(struct audit_ntp_data *ad, + enum audit_ntp_type type, long long val) +{ } + +static inline void audit_ntp_log(const struct audit_ntp_data *ad) +{ } + static inline void audit_ptrace(struct task_struct *t) { } #define audit_n_rules 0 diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index ab58d67baf4d..a1280af20336 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -115,6 +115,7 @@ #define AUDIT_KERN_MODULE 1330 /* Kernel Module events */ #define AUDIT_FANOTIFY 1331 /* Fanotify access decision */ #define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */ +#define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3843495d0083..5371b59bde36 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2519,6 +2519,28 @@ void __audit_tk_injoffset(struct timespec64 offset) (long long)offset.tv_sec, offset.tv_nsec); } +static void audit_log_ntp_val(const struct audit_ntp_data *ad, + const char *op, enum audit_ntp_type type) +{ + const struct audit_ntp_val *val = &ad->vals[type]; + + if (val->newval == val->oldval) + return; + + audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL, + "op=%s old=%lli new=%lli", op, val->oldval, val->newval); +} + +void __audit_ntp_log(const struct audit_ntp_data *ad) +{ + audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET); + audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ); + audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS); + audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI); + audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK); + audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); +} + static void audit_log_task(struct audit_buffer *ab) { kuid_t auid, uid; diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 92a90014a925..ac5555e25733 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "ntp_internal.h" #include "timekeeping_internal.h" @@ -709,7 +710,7 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, * kernel time-keeping variables. used by xntpd. */ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, - s32 *time_tai) + s32 *time_tai, struct audit_ntp_data *ad) { int result; @@ -720,14 +721,29 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, /* adjtime() is independent from ntp_adjtime() */ time_adjust = txc->offset; ntp_update_frequency(); + + audit_ntp_set_old(ad, AUDIT_NTP_ADJUST, save_adjust); + audit_ntp_set_new(ad, AUDIT_NTP_ADJUST, time_adjust); } txc->offset = save_adjust; } else { - /* If there are input parameters, then process them: */ - if (txc->modes) + if (txc->modes) { + audit_ntp_set_old(ad, AUDIT_NTP_OFFSET, time_offset); + audit_ntp_set_old(ad, AUDIT_NTP_FREQ, time_freq); + audit_ntp_set_old(ad, AUDIT_NTP_STATUS, time_status); + audit_ntp_set_old(ad, AUDIT_NTP_TAI, *time_tai); + audit_ntp_set_old(ad, AUDIT_NTP_TICK, tick_usec); + process_adjtimex_modes(txc, time_tai); + audit_ntp_set_new(ad, AUDIT_NTP_OFFSET, time_offset); + audit_ntp_set_new(ad, AUDIT_NTP_FREQ, time_freq); + audit_ntp_set_new(ad, AUDIT_NTP_STATUS, time_status); + audit_ntp_set_new(ad, AUDIT_NTP_TAI, *time_tai); + audit_ntp_set_new(ad, AUDIT_NTP_TICK, tick_usec); + } + txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, NTP_SCALE_SHIFT); if (!(time_status & STA_NANO)) diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 40e6122e634e..908ecaa65fc3 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -8,6 +8,8 @@ extern void ntp_clear(void); extern u64 ntp_tick_length(void); extern ktime_t ntp_get_next_leap(void); extern int second_overflow(time64_t secs); -extern int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai); +extern int __do_adjtimex(struct __kernel_timex *txc, + const struct timespec64 *ts, + s32 *time_tai, struct audit_ntp_data *ad); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3d24be4cd607..f366f2fdf1b0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2307,6 +2307,7 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc) int do_adjtimex(struct __kernel_timex *txc) { struct timekeeper *tk = &tk_core.timekeeper; + struct audit_ntp_data ad; unsigned long flags; struct timespec64 ts; s32 orig_tai, tai; @@ -2330,13 +2331,15 @@ int do_adjtimex(struct __kernel_timex *txc) audit_tk_injoffset(delta); } + audit_ntp_init(&ad); + ktime_get_real_ts64(&ts); raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); orig_tai = tai = tk->tai_offset; - ret = __do_adjtimex(txc, &ts, &tai); + ret = __do_adjtimex(txc, &ts, &tai, &ad); if (tai != orig_tai) { __timekeeping_set_tai_offset(tk, tai); @@ -2347,6 +2350,8 @@ int do_adjtimex(struct __kernel_timex *txc) write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); + audit_ntp_log(&ad); + /* Update the multiplier immediately if frequency was set directly */ if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) timekeeping_advance(TK_ADV_FREQ); -- cgit From 02a8c817a31606b6b37c2b755f6569903f44241e Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Sun, 14 Apr 2019 18:58:46 +0200 Subject: bpf: add map helper functions push, pop, peek in more BPF programs commit f1a2e44a3aec ("bpf: add queue and stack maps") introduced new BPF helper functions: - BPF_FUNC_map_push_elem - BPF_FUNC_map_pop_elem - BPF_FUNC_map_peek_elem but they were made available only for network BPF programs. This patch makes them available for tracepoint, cgroup and lirc programs. Signed-off-by: Alban Crequy Cc: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- drivers/media/rc/bpf-lirc.c | 6 ++++++ kernel/bpf/cgroup.c | 6 ++++++ kernel/trace/bpf_trace.c | 6 ++++++ 3 files changed, 18 insertions(+) (limited to 'kernel') diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 390a722e6211..ee657003c1a1 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -97,6 +97,12 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_map_push_elem: + return &bpf_map_push_elem_proto; + case BPF_FUNC_map_pop_elem: + return &bpf_map_pop_elem_proto; + case BPF_FUNC_map_peek_elem: + return &bpf_map_peek_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_tail_call: diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index e58a6c247f56..fcde0f7b2585 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -713,6 +713,12 @@ cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_map_push_elem: + return &bpf_map_push_elem_proto; + case BPF_FUNC_map_pop_elem: + return &bpf_map_pop_elem_proto; + case BPF_FUNC_map_peek_elem: + return &bpf_map_peek_elem_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_local_storage: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d64c00afceb5..91800be0c8eb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -569,6 +569,12 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; + case BPF_FUNC_map_push_elem: + return &bpf_map_push_elem_proto; + case BPF_FUNC_map_pop_elem: + return &bpf_map_pop_elem_proto; + case BPF_FUNC_map_peek_elem: + return &bpf_map_peek_elem_proto; case BPF_FUNC_probe_read: return &bpf_probe_read_proto; case BPF_FUNC_ktime_get_ns: -- cgit From 0d306c31b2f77391dacdeaad4470c577f2aecc4f Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Tue, 16 Apr 2019 18:13:01 +0900 Subject: bpf: use BPF_CAST_CALL for casting bpf call verifier.c uses BPF_CAST_CALL for casting bpf call except at one place in jit_subprogs(). Let's use the macro for consistency. Signed-off-by: Prashant Bhole Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c7220153c5b1..db301e9b5295 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7647,9 +7647,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) insn->src_reg != BPF_PSEUDO_CALL) continue; subprog = insn->off; - insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) - func[subprog]->bpf_func - - __bpf_call_base; + insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - + __bpf_call_base; } /* we use the aux data to keep a list of the start addresses -- cgit From 77361825bb01ecadf3ac8622e2e4dbc28806e858 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 12 Apr 2019 17:07:32 +0200 Subject: bpf: cpumap use ptr_ring_consume_batched Move ptr_ring dequeue outside loop, that allocate SKBs and calls network stack, as these operations that can take some time. The ptr_ring is a communication channel between CPUs, where we want to reduce/limit any cacheline bouncing. Do a concentrated bulk dequeue via ptr_ring_consume_batched, to shorten the period and times the remote cacheline in ptr_ring is read Batch size 8 is both to (1) limit BH-disable period, and (2) consume one cacheline on 64-bit archs. After reducing the BH-disable section further then we can consider changing this, while still thinking about L1 cacheline size being active. Signed-off-by: Jesper Dangaard Brouer Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumap.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 3c18260403dd..430103e182a0 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -240,6 +240,8 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) } } +#define CPUMAP_BATCH 8 + static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; @@ -252,8 +254,9 @@ static int cpu_map_kthread_run(void *data) * kthread_stop signal until queue is empty. */ while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { - unsigned int processed = 0, drops = 0, sched = 0; - struct xdp_frame *xdpf; + unsigned int drops = 0, sched = 0; + void *frames[CPUMAP_BATCH]; + int i, n; /* Release CPU reschedule checks */ if (__ptr_ring_empty(rcpu->queue)) { @@ -269,14 +272,16 @@ static int cpu_map_kthread_run(void *data) sched = cond_resched(); } - /* Process packets in rcpu->queue */ - local_bh_disable(); /* * The bpf_cpu_map_entry is single consumer, with this * kthread CPU pinned. Lockless access to ptr_ring * consume side valid as no-resize allowed of queue. */ - while ((xdpf = __ptr_ring_consume(rcpu->queue))) { + n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); + + local_bh_disable(); + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; struct sk_buff *skb; int ret; @@ -290,13 +295,9 @@ static int cpu_map_kthread_run(void *data) ret = netif_receive_skb_core(skb); if (ret == NET_RX_DROP) drops++; - - /* Limit BH-disable period */ - if (++processed == 8) - break; } /* Feedback loop via tracepoint */ - trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched); + trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched); local_bh_enable(); /* resched point, may call do_softirq() */ } -- cgit From 8f0504a97e1ba6b70e1c8b5a88255c280f263287 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 12 Apr 2019 17:07:43 +0200 Subject: bpf: cpumap do bulk allocation of SKBs As cpumap now batch consume xdp_frame's from the ptr_ring, it knows how many SKBs it need to allocate. Thus, lets bulk allocate these SKBs via kmem_cache_alloc_bulk() API, and use the previously introduced function build_skb_around(). Notice that the flag __GFP_ZERO asks the slab/slub allocator to clear the memory for us. This does clear a larger area than needed, but my micro benchmarks on Intel CPUs show that this is slightly faster due to being a cacheline aligned area is cleared for the SKBs. (For SLUB allocator, there is a future optimization potential, because SKBs will with high probability originate from same page. If we can find/identify continuous memory areas then the Intel CPU memset rep stos will have a real performance gain.) Signed-off-by: Jesper Dangaard Brouer Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumap.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 430103e182a0..732d6ced3987 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work) } static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, - struct xdp_frame *xdpf) + struct xdp_frame *xdpf, + struct sk_buff *skb) { unsigned int hard_start_headroom; unsigned int frame_size; void *pkt_data_start; - struct sk_buff *skb; /* Part of headroom was reserved to xdpf */ hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom; @@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); pkt_data_start = xdpf->data - hard_start_headroom; - skb = build_skb(pkt_data_start, frame_size); - if (!skb) + skb = build_skb_around(skb, pkt_data_start, frame_size); + if (unlikely(!skb)) return NULL; skb_reserve(skb, hard_start_headroom); @@ -256,7 +256,9 @@ static int cpu_map_kthread_run(void *data) while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { unsigned int drops = 0, sched = 0; void *frames[CPUMAP_BATCH]; - int i, n; + void *skbs[CPUMAP_BATCH]; + gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; + int i, n, m; /* Release CPU reschedule checks */ if (__ptr_ring_empty(rcpu->queue)) { @@ -278,14 +280,20 @@ static int cpu_map_kthread_run(void *data) * consume side valid as no-resize allowed of queue. */ n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); + m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs); + if (unlikely(m == 0)) { + for (i = 0; i < n; i++) + skbs[i] = NULL; /* effect: xdp_return_frame */ + drops = n; + } local_bh_disable(); for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; - struct sk_buff *skb; + struct sk_buff *skb = skbs[i]; int ret; - skb = cpu_map_build_skb(rcpu, xdpf); + skb = cpu_map_build_skb(rcpu, xdpf, skb); if (!skb) { xdp_return_frame(xdpf); continue; -- cgit From 86d231459d6dc9094e70c35c3517f4ef860b2f1e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 12 Apr 2019 17:07:48 +0200 Subject: bpf: cpumap memory prefetchw optimizations for struct page A lot of the performance gain comes from this patch. While analysing performance overhead it was found that the largest CPU stalls were caused when touching the struct page area. It is first read with a READ_ONCE from build_skb_around via page_is_pfmemalloc(), and when freed written by page_frag_free() call. Measurements show that the prefetchw (W) variant operation is needed to achieve the performance gain. We believe this optimization it two fold, first the W-variant saves one step in the cache-coherency protocol, and second it helps us to avoid the non-temporal prefetch HW optimizations and bring this into all cache-levels. It might be worth investigating if prefetch into L2 will have the same benefit. Signed-off-by: Jesper Dangaard Brouer Acked-by: Ilias Apalodimas Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumap.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 732d6ced3987..cf727d77c6c6 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -280,6 +280,18 @@ static int cpu_map_kthread_run(void *data) * consume side valid as no-resize allowed of queue. */ n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH); + + for (i = 0; i < n; i++) { + void *f = frames[i]; + struct page *page = virt_to_page(f); + + /* Bring struct page memory area to curr CPU. Read by + * build_skb_around via page_is_pfmemalloc(), and when + * freed written by page_frag_free call. + */ + prefetchw(page); + } + m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs); if (unlikely(m == 0)) { for (i = 0; i < n; i++) -- cgit From 0bc199854405543b0debe67c735c0aae94f1d319 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Date: Wed, 17 Apr 2019 16:35:49 -0400 Subject: ipv6: Add rate limit mask for ICMPv6 messages To make ICMPv6 closer to ICMPv4, add ratemask parameter. Since the ICMP message types use larger numeric values, a simple bitmask doesn't fit. I use large bitmap. The input and output are the in form of list of ranges. Set the default to rate limit all error messages but Packet Too Big. For Packet Too Big, use ratemask instead of hard-coded. There are functions where icmpv6_xrlim_allow() and icmpv6_global_allow() aren't called. This patch only adds them to icmpv6_echo_reply(). Rate limiting error messages is mandated by RFC 4443 but RFC 4890 says that it is also acceptable to rate limit informational messages. Thus, I removed the current hard-coded behavior of icmpv6_mask_allow() that doesn't rate limit informational messages. v2: Add dummy function proc_do_large_bitmap() if CONFIG_PROC_SYSCTL isn't defined, expand the description in ip-sysctl.txt and remove unnecessary conditional before kfree(). v3: Inline the bitmap instead of dynamically allocated. Still is a pointer to it is needed because of the way proc_do_large_bitmap work. Signed-off-by: Stephen Suryaputra Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 17 ++++++++++++++++- include/net/netns/ipv6.h | 3 +++ include/uapi/linux/icmpv6.h | 4 ++++ kernel/sysctl.c | 6 ++++++ net/ipv6/af_inet6.c | 9 +++++++++ net/ipv6/icmp.c | 31 ++++++++++++++++++++++--------- 6 files changed, 60 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5eedc6941ce5..8a5e59ba223f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1913,11 +1913,26 @@ enhanced_dad - BOOLEAN icmp/*: ratelimit - INTEGER - Limit the maximal rates for sending ICMPv6 packets. + Limit the maximal rates for sending ICMPv6 messages. 0 to disable any limiting, otherwise the minimal space between responses in milliseconds. Default: 1000 +ratemask - list of comma separated ranges + For ICMPv6 message types matching the ranges in the ratemask, limit + the sending of the message according to ratelimit parameter. + + The format used for both input and output is a comma separated + list of ranges (e.g. "0-127,129" for ICMPv6 message type 0 to 127 and + 129). Writing to the file will clear all previous ranges of ICMPv6 + message types and update the current list with the input. + + Refer to: https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml + for numerical values of ICMPv6 message types, e.g. echo request is 128 + and echo reply is 129. + + Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big) + echo_ignore_all - BOOLEAN If set non-zero, then the kernel will ignore all ICMP ECHO requests sent to it over the IPv6 protocol. diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 64e29b58bb5e..5e61b5a8635d 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -8,6 +8,7 @@ #ifndef __NETNS_IPV6_H__ #define __NETNS_IPV6_H__ #include +#include struct ctl_table_header; @@ -35,6 +36,8 @@ struct netns_sysctl_ipv6 { int icmpv6_echo_ignore_all; int icmpv6_echo_ignore_multicast; int icmpv6_echo_ignore_anycast; + DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1); + unsigned long *icmpv6_ratemask_ptr; int anycast_src_echo_reply; int ip_nonlocal_bind; int fwmark_reflect; diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 325395f56bfa..2622b5a3e616 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -90,6 +90,8 @@ struct icmp6hdr { #define ICMPV6_TIME_EXCEED 3 #define ICMPV6_PARAMPROB 4 +#define ICMPV6_ERRMSG_MAX 127 + #define ICMPV6_INFOMSG_MASK 0x80 #define ICMPV6_ECHO_REQUEST 128 @@ -110,6 +112,8 @@ struct icmp6hdr { #define ICMPV6_MRDISC_ADV 151 +#define ICMPV6_MSG_MAX 255 + /* * Codes for Destination Unreachable */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c9ec050bcf46..599510a3355e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -3326,6 +3326,11 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, return -ENOSYS; } +int proc_do_large_bitmap(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} #endif /* CONFIG_PROC_SYSCTL */ @@ -3366,3 +3371,4 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); +EXPORT_SYMBOL(proc_do_large_bitmap); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d8587ca4fbeb..3d1de28aaa9e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -850,6 +850,15 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; + + /* By default, rate limit error messages. + * Except for pmtu discovery, it would break it. + * proc_do_large_bitmap needs pointer to the bitmap. + */ + bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1); + bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1); + net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask; + net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index cc14b9998941..afb915807cd0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -168,22 +168,21 @@ static bool is_ineligible(const struct sk_buff *skb) return false; } -static bool icmpv6_mask_allow(int type) +static bool icmpv6_mask_allow(struct net *net, int type) { - /* Informational messages are not limited. */ - if (type & ICMPV6_INFOMSG_MASK) + if (type > ICMPV6_MSG_MAX) return true; - /* Do not limit pmtu discovery, it would break it. */ - if (type == ICMPV6_PKT_TOOBIG) + /* Limit if icmp type is set in ratemask. */ + if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) return true; return false; } -static bool icmpv6_global_allow(int type) +static bool icmpv6_global_allow(struct net *net, int type) { - if (icmpv6_mask_allow(type)) + if (icmpv6_mask_allow(net, type)) return true; if (icmp_global_allow()) @@ -202,7 +201,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct dst_entry *dst; bool res = false; - if (icmpv6_mask_allow(type)) + if (icmpv6_mask_allow(net, type)) return true; /* @@ -511,7 +510,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type)) + if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) goto out_bh_enable; mip6_addr_swap(skb); @@ -731,6 +730,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (IS_ERR(dst)) goto out; + /* Check the ratelimit */ + if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || + !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) + goto out_dst_release; + idev = __in6_dev_get(skb->dev); msg.skb = skb; @@ -751,6 +755,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); } +out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); @@ -1137,6 +1142,13 @@ static struct ctl_table ipv6_icmp_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ratemask", + .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, + .maxlen = ICMPV6_MSG_MAX + 1, + .mode = 0644, + .proc_handler = proc_do_large_bitmap, + }, { }, }; @@ -1153,6 +1165,7 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; + table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; } return table; } -- cgit From 50943f3e136adfc421f9768d6ae09ba7b83aaefd Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 19 Apr 2019 10:03:01 -0700 Subject: cgroup: rename freezer.c into legacy_freezer.c Freezer.c will contain an implementation of cgroup v2 freezer, so let's rename the v1 freezer to avoid naming conflicts. Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo Cc: kernel-team@fb.com --- kernel/cgroup/Makefile | 2 +- kernel/cgroup/freezer.c | 481 ----------------------------------------- kernel/cgroup/legacy_freezer.c | 481 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 482 insertions(+), 482 deletions(-) delete mode 100644 kernel/cgroup/freezer.c create mode 100644 kernel/cgroup/legacy_freezer.c (limited to 'kernel') diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index bfcdae896122..8d5689ca94b9 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o -obj-$(CONFIG_CGROUP_FREEZER) += freezer.o +obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o obj-$(CONFIG_CGROUP_PIDS) += pids.o obj-$(CONFIG_CGROUP_RDMA) += rdma.o obj-$(CONFIG_CPUSETS) += cpuset.o diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c deleted file mode 100644 index 08236798d173..000000000000 --- a/kernel/cgroup/freezer.c +++ /dev/null @@ -1,481 +0,0 @@ -/* - * cgroup_freezer.c - control group freezer subsystem - * - * Copyright IBM Corporation, 2007 - * - * Author : Cedric Le Goater - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2.1 of the GNU Lesser General Public License - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is - * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared - * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING - * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of - * its ancestors has FREEZING_SELF set. - */ -enum freezer_state_flags { - CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ - CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ - CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ - CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ - - /* mask for all FREEZING flags */ - CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, -}; - -struct freezer { - struct cgroup_subsys_state css; - unsigned int state; -}; - -static DEFINE_MUTEX(freezer_mutex); - -static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) -{ - return css ? container_of(css, struct freezer, css) : NULL; -} - -static inline struct freezer *task_freezer(struct task_struct *task) -{ - return css_freezer(task_css(task, freezer_cgrp_id)); -} - -static struct freezer *parent_freezer(struct freezer *freezer) -{ - return css_freezer(freezer->css.parent); -} - -bool cgroup_freezing(struct task_struct *task) -{ - bool ret; - - rcu_read_lock(); - ret = task_freezer(task)->state & CGROUP_FREEZING; - rcu_read_unlock(); - - return ret; -} - -static const char *freezer_state_strs(unsigned int state) -{ - if (state & CGROUP_FROZEN) - return "FROZEN"; - if (state & CGROUP_FREEZING) - return "FREEZING"; - return "THAWED"; -}; - -static struct cgroup_subsys_state * -freezer_css_alloc(struct cgroup_subsys_state *parent_css) -{ - struct freezer *freezer; - - freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); - if (!freezer) - return ERR_PTR(-ENOMEM); - - return &freezer->css; -} - -/** - * freezer_css_online - commit creation of a freezer css - * @css: css being created - * - * We're committing to creation of @css. Mark it online and inherit - * parent's freezing state while holding both parent's and our - * freezer->lock. - */ -static int freezer_css_online(struct cgroup_subsys_state *css) -{ - struct freezer *freezer = css_freezer(css); - struct freezer *parent = parent_freezer(freezer); - - mutex_lock(&freezer_mutex); - - freezer->state |= CGROUP_FREEZER_ONLINE; - - if (parent && (parent->state & CGROUP_FREEZING)) { - freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; - atomic_inc(&system_freezing_cnt); - } - - mutex_unlock(&freezer_mutex); - return 0; -} - -/** - * freezer_css_offline - initiate destruction of a freezer css - * @css: css being destroyed - * - * @css is going away. Mark it dead and decrement system_freezing_count if - * it was holding one. - */ -static void freezer_css_offline(struct cgroup_subsys_state *css) -{ - struct freezer *freezer = css_freezer(css); - - mutex_lock(&freezer_mutex); - - if (freezer->state & CGROUP_FREEZING) - atomic_dec(&system_freezing_cnt); - - freezer->state = 0; - - mutex_unlock(&freezer_mutex); -} - -static void freezer_css_free(struct cgroup_subsys_state *css) -{ - kfree(css_freezer(css)); -} - -/* - * Tasks can be migrated into a different freezer anytime regardless of its - * current state. freezer_attach() is responsible for making new tasks - * conform to the current state. - * - * Freezer state changes and task migration are synchronized via - * @freezer->lock. freezer_attach() makes the new tasks conform to the - * current state and all following state changes can see the new tasks. - */ -static void freezer_attach(struct cgroup_taskset *tset) -{ - struct task_struct *task; - struct cgroup_subsys_state *new_css; - - mutex_lock(&freezer_mutex); - - /* - * Make the new tasks conform to the current state of @new_css. - * For simplicity, when migrating any task to a FROZEN cgroup, we - * revert it to FREEZING and let update_if_frozen() determine the - * correct state later. - * - * Tasks in @tset are on @new_css but may not conform to its - * current state before executing the following - !frozen tasks may - * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. - */ - cgroup_taskset_for_each(task, new_css, tset) { - struct freezer *freezer = css_freezer(new_css); - - if (!(freezer->state & CGROUP_FREEZING)) { - __thaw_task(task); - } else { - freeze_task(task); - /* clear FROZEN and propagate upwards */ - while (freezer && (freezer->state & CGROUP_FROZEN)) { - freezer->state &= ~CGROUP_FROZEN; - freezer = parent_freezer(freezer); - } - } - } - - mutex_unlock(&freezer_mutex); -} - -/** - * freezer_fork - cgroup post fork callback - * @task: a task which has just been forked - * - * @task has just been created and should conform to the current state of - * the cgroup_freezer it belongs to. This function may race against - * freezer_attach(). Losing to freezer_attach() means that we don't have - * to do anything as freezer_attach() will put @task into the appropriate - * state. - */ -static void freezer_fork(struct task_struct *task) -{ - struct freezer *freezer; - - /* - * The root cgroup is non-freezable, so we can skip locking the - * freezer. This is safe regardless of race with task migration. - * If we didn't race or won, skipping is obviously the right thing - * to do. If we lost and root is the new cgroup, noop is still the - * right thing to do. - */ - if (task_css_is_root(task, freezer_cgrp_id)) - return; - - mutex_lock(&freezer_mutex); - rcu_read_lock(); - - freezer = task_freezer(task); - if (freezer->state & CGROUP_FREEZING) - freeze_task(task); - - rcu_read_unlock(); - mutex_unlock(&freezer_mutex); -} - -/** - * update_if_frozen - update whether a cgroup finished freezing - * @css: css of interest - * - * Once FREEZING is initiated, transition to FROZEN is lazily updated by - * calling this function. If the current state is FREEZING but not FROZEN, - * this function checks whether all tasks of this cgroup and the descendant - * cgroups finished freezing and, if so, sets FROZEN. - * - * The caller is responsible for grabbing RCU read lock and calling - * update_if_frozen() on all descendants prior to invoking this function. - * - * Task states and freezer state might disagree while tasks are being - * migrated into or out of @css, so we can't verify task states against - * @freezer state here. See freezer_attach() for details. - */ -static void update_if_frozen(struct cgroup_subsys_state *css) -{ - struct freezer *freezer = css_freezer(css); - struct cgroup_subsys_state *pos; - struct css_task_iter it; - struct task_struct *task; - - lockdep_assert_held(&freezer_mutex); - - if (!(freezer->state & CGROUP_FREEZING) || - (freezer->state & CGROUP_FROZEN)) - return; - - /* are all (live) children frozen? */ - rcu_read_lock(); - css_for_each_child(pos, css) { - struct freezer *child = css_freezer(pos); - - if ((child->state & CGROUP_FREEZER_ONLINE) && - !(child->state & CGROUP_FROZEN)) { - rcu_read_unlock(); - return; - } - } - rcu_read_unlock(); - - /* are all tasks frozen? */ - css_task_iter_start(css, 0, &it); - - while ((task = css_task_iter_next(&it))) { - if (freezing(task)) { - /* - * freezer_should_skip() indicates that the task - * should be skipped when determining freezing - * completion. Consider it frozen in addition to - * the usual frozen condition. - */ - if (!frozen(task) && !freezer_should_skip(task)) - goto out_iter_end; - } - } - - freezer->state |= CGROUP_FROZEN; -out_iter_end: - css_task_iter_end(&it); -} - -static int freezer_read(struct seq_file *m, void *v) -{ - struct cgroup_subsys_state *css = seq_css(m), *pos; - - mutex_lock(&freezer_mutex); - rcu_read_lock(); - - /* update states bottom-up */ - css_for_each_descendant_post(pos, css) { - if (!css_tryget_online(pos)) - continue; - rcu_read_unlock(); - - update_if_frozen(pos); - - rcu_read_lock(); - css_put(pos); - } - - rcu_read_unlock(); - mutex_unlock(&freezer_mutex); - - seq_puts(m, freezer_state_strs(css_freezer(css)->state)); - seq_putc(m, '\n'); - return 0; -} - -static void freeze_cgroup(struct freezer *freezer) -{ - struct css_task_iter it; - struct task_struct *task; - - css_task_iter_start(&freezer->css, 0, &it); - while ((task = css_task_iter_next(&it))) - freeze_task(task); - css_task_iter_end(&it); -} - -static void unfreeze_cgroup(struct freezer *freezer) -{ - struct css_task_iter it; - struct task_struct *task; - - css_task_iter_start(&freezer->css, 0, &it); - while ((task = css_task_iter_next(&it))) - __thaw_task(task); - css_task_iter_end(&it); -} - -/** - * freezer_apply_state - apply state change to a single cgroup_freezer - * @freezer: freezer to apply state change to - * @freeze: whether to freeze or unfreeze - * @state: CGROUP_FREEZING_* flag to set or clear - * - * Set or clear @state on @cgroup according to @freeze, and perform - * freezing or thawing as necessary. - */ -static void freezer_apply_state(struct freezer *freezer, bool freeze, - unsigned int state) -{ - /* also synchronizes against task migration, see freezer_attach() */ - lockdep_assert_held(&freezer_mutex); - - if (!(freezer->state & CGROUP_FREEZER_ONLINE)) - return; - - if (freeze) { - if (!(freezer->state & CGROUP_FREEZING)) - atomic_inc(&system_freezing_cnt); - freezer->state |= state; - freeze_cgroup(freezer); - } else { - bool was_freezing = freezer->state & CGROUP_FREEZING; - - freezer->state &= ~state; - - if (!(freezer->state & CGROUP_FREEZING)) { - if (was_freezing) - atomic_dec(&system_freezing_cnt); - freezer->state &= ~CGROUP_FROZEN; - unfreeze_cgroup(freezer); - } - } -} - -/** - * freezer_change_state - change the freezing state of a cgroup_freezer - * @freezer: freezer of interest - * @freeze: whether to freeze or thaw - * - * Freeze or thaw @freezer according to @freeze. The operations are - * recursive - all descendants of @freezer will be affected. - */ -static void freezer_change_state(struct freezer *freezer, bool freeze) -{ - struct cgroup_subsys_state *pos; - - /* - * Update all its descendants in pre-order traversal. Each - * descendant will try to inherit its parent's FREEZING state as - * CGROUP_FREEZING_PARENT. - */ - mutex_lock(&freezer_mutex); - rcu_read_lock(); - css_for_each_descendant_pre(pos, &freezer->css) { - struct freezer *pos_f = css_freezer(pos); - struct freezer *parent = parent_freezer(pos_f); - - if (!css_tryget_online(pos)) - continue; - rcu_read_unlock(); - - if (pos_f == freezer) - freezer_apply_state(pos_f, freeze, - CGROUP_FREEZING_SELF); - else - freezer_apply_state(pos_f, - parent->state & CGROUP_FREEZING, - CGROUP_FREEZING_PARENT); - - rcu_read_lock(); - css_put(pos); - } - rcu_read_unlock(); - mutex_unlock(&freezer_mutex); -} - -static ssize_t freezer_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - bool freeze; - - buf = strstrip(buf); - - if (strcmp(buf, freezer_state_strs(0)) == 0) - freeze = false; - else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) - freeze = true; - else - return -EINVAL; - - freezer_change_state(css_freezer(of_css(of)), freeze); - return nbytes; -} - -static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - struct freezer *freezer = css_freezer(css); - - return (bool)(freezer->state & CGROUP_FREEZING_SELF); -} - -static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, - struct cftype *cft) -{ - struct freezer *freezer = css_freezer(css); - - return (bool)(freezer->state & CGROUP_FREEZING_PARENT); -} - -static struct cftype files[] = { - { - .name = "state", - .flags = CFTYPE_NOT_ON_ROOT, - .seq_show = freezer_read, - .write = freezer_write, - }, - { - .name = "self_freezing", - .flags = CFTYPE_NOT_ON_ROOT, - .read_u64 = freezer_self_freezing_read, - }, - { - .name = "parent_freezing", - .flags = CFTYPE_NOT_ON_ROOT, - .read_u64 = freezer_parent_freezing_read, - }, - { } /* terminate */ -}; - -struct cgroup_subsys freezer_cgrp_subsys = { - .css_alloc = freezer_css_alloc, - .css_online = freezer_css_online, - .css_offline = freezer_css_offline, - .css_free = freezer_css_free, - .attach = freezer_attach, - .fork = freezer_fork, - .legacy_cftypes = files, -}; diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c new file mode 100644 index 000000000000..08236798d173 --- /dev/null +++ b/kernel/cgroup/legacy_freezer.c @@ -0,0 +1,481 @@ +/* + * cgroup_freezer.c - control group freezer subsystem + * + * Copyright IBM Corporation, 2007 + * + * Author : Cedric Le Goater + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is + * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared + * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING + * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of + * its ancestors has FREEZING_SELF set. + */ +enum freezer_state_flags { + CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ + CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ + CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ + CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ + + /* mask for all FREEZING flags */ + CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, +}; + +struct freezer { + struct cgroup_subsys_state css; + unsigned int state; +}; + +static DEFINE_MUTEX(freezer_mutex); + +static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct freezer, css) : NULL; +} + +static inline struct freezer *task_freezer(struct task_struct *task) +{ + return css_freezer(task_css(task, freezer_cgrp_id)); +} + +static struct freezer *parent_freezer(struct freezer *freezer) +{ + return css_freezer(freezer->css.parent); +} + +bool cgroup_freezing(struct task_struct *task) +{ + bool ret; + + rcu_read_lock(); + ret = task_freezer(task)->state & CGROUP_FREEZING; + rcu_read_unlock(); + + return ret; +} + +static const char *freezer_state_strs(unsigned int state) +{ + if (state & CGROUP_FROZEN) + return "FROZEN"; + if (state & CGROUP_FREEZING) + return "FREEZING"; + return "THAWED"; +}; + +static struct cgroup_subsys_state * +freezer_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct freezer *freezer; + + freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); + if (!freezer) + return ERR_PTR(-ENOMEM); + + return &freezer->css; +} + +/** + * freezer_css_online - commit creation of a freezer css + * @css: css being created + * + * We're committing to creation of @css. Mark it online and inherit + * parent's freezing state while holding both parent's and our + * freezer->lock. + */ +static int freezer_css_online(struct cgroup_subsys_state *css) +{ + struct freezer *freezer = css_freezer(css); + struct freezer *parent = parent_freezer(freezer); + + mutex_lock(&freezer_mutex); + + freezer->state |= CGROUP_FREEZER_ONLINE; + + if (parent && (parent->state & CGROUP_FREEZING)) { + freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; + atomic_inc(&system_freezing_cnt); + } + + mutex_unlock(&freezer_mutex); + return 0; +} + +/** + * freezer_css_offline - initiate destruction of a freezer css + * @css: css being destroyed + * + * @css is going away. Mark it dead and decrement system_freezing_count if + * it was holding one. + */ +static void freezer_css_offline(struct cgroup_subsys_state *css) +{ + struct freezer *freezer = css_freezer(css); + + mutex_lock(&freezer_mutex); + + if (freezer->state & CGROUP_FREEZING) + atomic_dec(&system_freezing_cnt); + + freezer->state = 0; + + mutex_unlock(&freezer_mutex); +} + +static void freezer_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_freezer(css)); +} + +/* + * Tasks can be migrated into a different freezer anytime regardless of its + * current state. freezer_attach() is responsible for making new tasks + * conform to the current state. + * + * Freezer state changes and task migration are synchronized via + * @freezer->lock. freezer_attach() makes the new tasks conform to the + * current state and all following state changes can see the new tasks. + */ +static void freezer_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *new_css; + + mutex_lock(&freezer_mutex); + + /* + * Make the new tasks conform to the current state of @new_css. + * For simplicity, when migrating any task to a FROZEN cgroup, we + * revert it to FREEZING and let update_if_frozen() determine the + * correct state later. + * + * Tasks in @tset are on @new_css but may not conform to its + * current state before executing the following - !frozen tasks may + * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. + */ + cgroup_taskset_for_each(task, new_css, tset) { + struct freezer *freezer = css_freezer(new_css); + + if (!(freezer->state & CGROUP_FREEZING)) { + __thaw_task(task); + } else { + freeze_task(task); + /* clear FROZEN and propagate upwards */ + while (freezer && (freezer->state & CGROUP_FROZEN)) { + freezer->state &= ~CGROUP_FROZEN; + freezer = parent_freezer(freezer); + } + } + } + + mutex_unlock(&freezer_mutex); +} + +/** + * freezer_fork - cgroup post fork callback + * @task: a task which has just been forked + * + * @task has just been created and should conform to the current state of + * the cgroup_freezer it belongs to. This function may race against + * freezer_attach(). Losing to freezer_attach() means that we don't have + * to do anything as freezer_attach() will put @task into the appropriate + * state. + */ +static void freezer_fork(struct task_struct *task) +{ + struct freezer *freezer; + + /* + * The root cgroup is non-freezable, so we can skip locking the + * freezer. This is safe regardless of race with task migration. + * If we didn't race or won, skipping is obviously the right thing + * to do. If we lost and root is the new cgroup, noop is still the + * right thing to do. + */ + if (task_css_is_root(task, freezer_cgrp_id)) + return; + + mutex_lock(&freezer_mutex); + rcu_read_lock(); + + freezer = task_freezer(task); + if (freezer->state & CGROUP_FREEZING) + freeze_task(task); + + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); +} + +/** + * update_if_frozen - update whether a cgroup finished freezing + * @css: css of interest + * + * Once FREEZING is initiated, transition to FROZEN is lazily updated by + * calling this function. If the current state is FREEZING but not FROZEN, + * this function checks whether all tasks of this cgroup and the descendant + * cgroups finished freezing and, if so, sets FROZEN. + * + * The caller is responsible for grabbing RCU read lock and calling + * update_if_frozen() on all descendants prior to invoking this function. + * + * Task states and freezer state might disagree while tasks are being + * migrated into or out of @css, so we can't verify task states against + * @freezer state here. See freezer_attach() for details. + */ +static void update_if_frozen(struct cgroup_subsys_state *css) +{ + struct freezer *freezer = css_freezer(css); + struct cgroup_subsys_state *pos; + struct css_task_iter it; + struct task_struct *task; + + lockdep_assert_held(&freezer_mutex); + + if (!(freezer->state & CGROUP_FREEZING) || + (freezer->state & CGROUP_FROZEN)) + return; + + /* are all (live) children frozen? */ + rcu_read_lock(); + css_for_each_child(pos, css) { + struct freezer *child = css_freezer(pos); + + if ((child->state & CGROUP_FREEZER_ONLINE) && + !(child->state & CGROUP_FROZEN)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + + /* are all tasks frozen? */ + css_task_iter_start(css, 0, &it); + + while ((task = css_task_iter_next(&it))) { + if (freezing(task)) { + /* + * freezer_should_skip() indicates that the task + * should be skipped when determining freezing + * completion. Consider it frozen in addition to + * the usual frozen condition. + */ + if (!frozen(task) && !freezer_should_skip(task)) + goto out_iter_end; + } + } + + freezer->state |= CGROUP_FROZEN; +out_iter_end: + css_task_iter_end(&it); +} + +static int freezer_read(struct seq_file *m, void *v) +{ + struct cgroup_subsys_state *css = seq_css(m), *pos; + + mutex_lock(&freezer_mutex); + rcu_read_lock(); + + /* update states bottom-up */ + css_for_each_descendant_post(pos, css) { + if (!css_tryget_online(pos)) + continue; + rcu_read_unlock(); + + update_if_frozen(pos); + + rcu_read_lock(); + css_put(pos); + } + + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); + + seq_puts(m, freezer_state_strs(css_freezer(css)->state)); + seq_putc(m, '\n'); + return 0; +} + +static void freeze_cgroup(struct freezer *freezer) +{ + struct css_task_iter it; + struct task_struct *task; + + css_task_iter_start(&freezer->css, 0, &it); + while ((task = css_task_iter_next(&it))) + freeze_task(task); + css_task_iter_end(&it); +} + +static void unfreeze_cgroup(struct freezer *freezer) +{ + struct css_task_iter it; + struct task_struct *task; + + css_task_iter_start(&freezer->css, 0, &it); + while ((task = css_task_iter_next(&it))) + __thaw_task(task); + css_task_iter_end(&it); +} + +/** + * freezer_apply_state - apply state change to a single cgroup_freezer + * @freezer: freezer to apply state change to + * @freeze: whether to freeze or unfreeze + * @state: CGROUP_FREEZING_* flag to set or clear + * + * Set or clear @state on @cgroup according to @freeze, and perform + * freezing or thawing as necessary. + */ +static void freezer_apply_state(struct freezer *freezer, bool freeze, + unsigned int state) +{ + /* also synchronizes against task migration, see freezer_attach() */ + lockdep_assert_held(&freezer_mutex); + + if (!(freezer->state & CGROUP_FREEZER_ONLINE)) + return; + + if (freeze) { + if (!(freezer->state & CGROUP_FREEZING)) + atomic_inc(&system_freezing_cnt); + freezer->state |= state; + freeze_cgroup(freezer); + } else { + bool was_freezing = freezer->state & CGROUP_FREEZING; + + freezer->state &= ~state; + + if (!(freezer->state & CGROUP_FREEZING)) { + if (was_freezing) + atomic_dec(&system_freezing_cnt); + freezer->state &= ~CGROUP_FROZEN; + unfreeze_cgroup(freezer); + } + } +} + +/** + * freezer_change_state - change the freezing state of a cgroup_freezer + * @freezer: freezer of interest + * @freeze: whether to freeze or thaw + * + * Freeze or thaw @freezer according to @freeze. The operations are + * recursive - all descendants of @freezer will be affected. + */ +static void freezer_change_state(struct freezer *freezer, bool freeze) +{ + struct cgroup_subsys_state *pos; + + /* + * Update all its descendants in pre-order traversal. Each + * descendant will try to inherit its parent's FREEZING state as + * CGROUP_FREEZING_PARENT. + */ + mutex_lock(&freezer_mutex); + rcu_read_lock(); + css_for_each_descendant_pre(pos, &freezer->css) { + struct freezer *pos_f = css_freezer(pos); + struct freezer *parent = parent_freezer(pos_f); + + if (!css_tryget_online(pos)) + continue; + rcu_read_unlock(); + + if (pos_f == freezer) + freezer_apply_state(pos_f, freeze, + CGROUP_FREEZING_SELF); + else + freezer_apply_state(pos_f, + parent->state & CGROUP_FREEZING, + CGROUP_FREEZING_PARENT); + + rcu_read_lock(); + css_put(pos); + } + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); +} + +static ssize_t freezer_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + bool freeze; + + buf = strstrip(buf); + + if (strcmp(buf, freezer_state_strs(0)) == 0) + freeze = false; + else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) + freeze = true; + else + return -EINVAL; + + freezer_change_state(css_freezer(of_css(of)), freeze); + return nbytes; +} + +static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct freezer *freezer = css_freezer(css); + + return (bool)(freezer->state & CGROUP_FREEZING_SELF); +} + +static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct freezer *freezer = css_freezer(css); + + return (bool)(freezer->state & CGROUP_FREEZING_PARENT); +} + +static struct cftype files[] = { + { + .name = "state", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = freezer_read, + .write = freezer_write, + }, + { + .name = "self_freezing", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = freezer_self_freezing_read, + }, + { + .name = "parent_freezing", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = freezer_parent_freezing_read, + }, + { } /* terminate */ +}; + +struct cgroup_subsys freezer_cgrp_subsys = { + .css_alloc = freezer_css_alloc, + .css_online = freezer_css_online, + .css_offline = freezer_css_offline, + .css_free = freezer_css_free, + .attach = freezer_attach, + .fork = freezer_fork, + .legacy_cftypes = files, +}; -- cgit From aade7f9efba098859681f8e88d81a5b44ad09b12 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 19 Apr 2019 10:03:02 -0700 Subject: cgroup: implement __cgroup_task_count() helper The helper is identical to the existing cgroup_task_count() except it doesn't take the css_set_lock by itself, assuming that the caller does. Also, move cgroup_task_count() implementation into kernel/cgroup/cgroup.c, as there is nothing specific to cgroup v1. Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo Cc: kernel-team@fb.com --- kernel/cgroup/cgroup-internal.h | 1 + kernel/cgroup/cgroup-v1.c | 16 ---------------- kernel/cgroup/cgroup.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 30e39f3932ad..02c001ffe2e2 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -240,6 +240,7 @@ int cgroup_rmdir(struct kernfs_node *kn); int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, struct kernfs_root *kf_root); +int __cgroup_task_count(const struct cgroup *cgrp); int cgroup_task_count(const struct cgroup *cgrp); /* diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index c126b34fd4ff..68ca5de7ec27 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp, return l; } -/** - * cgroup_task_count - count the number of tasks in a cgroup. - * @cgrp: the cgroup in question - */ -int cgroup_task_count(const struct cgroup *cgrp) -{ - int count = 0; - struct cgrp_cset_link *link; - - spin_lock_irq(&css_set_lock); - list_for_each_entry(link, &cgrp->cset_links, cset_link) - count += link->cset->nr_tasks; - spin_unlock_irq(&css_set_lock); - return count; -} - /* * Load a cgroup's pidarray with either procs' tgids or tasks' pids */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f219c195a9a5..3008ea684aa0 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp) css_get(&cgrp->self); } +/** + * __cgroup_task_count - count the number of tasks in a cgroup. The caller + * is responsible for taking the css_set_lock. + * @cgrp: the cgroup in question + */ +int __cgroup_task_count(const struct cgroup *cgrp) +{ + int count = 0; + struct cgrp_cset_link *link; + + lockdep_assert_held(&css_set_lock); + + list_for_each_entry(link, &cgrp->cset_links, cset_link) + count += link->cset->nr_tasks; + + return count; +} + +/** + * cgroup_task_count - count the number of tasks in a cgroup. + * @cgrp: the cgroup in question + */ +int cgroup_task_count(const struct cgroup *cgrp) +{ + int count; + + spin_lock_irq(&css_set_lock); + count = __cgroup_task_count(cgrp); + spin_unlock_irq(&css_set_lock); + + return count; +} + struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) { struct cgroup *cgrp = of->kn->parent->priv; -- cgit From 4dcabece4c3a9f9522127be12cc12cc120399b2f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 19 Apr 2019 10:03:03 -0700 Subject: cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock The number of descendant cgroups and the number of dying descendant cgroups are currently synchronized using the cgroup_mutex. The number of descendant cgroups will be required by the cgroup v2 freezer, which will use it to determine if a cgroup is frozen (depending on total number of descendants and number of frozen descendants). It's not always acceptable to grab the cgroup_mutex, especially from quite hot paths (e.g. exit()). To avoid this, let's additionally synchronize these counters using the css_set_lock. So, it's safe to read these counters with either cgroup_mutex or css_set_lock locked, and for changing both locks should be acquired. Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo Cc: kernel-team@fb.com --- include/linux/cgroup-defs.h | 5 +++++ kernel/cgroup/cgroup.c | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'kernel') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1c70803e9f77..7d57890cec67 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -349,6 +349,11 @@ struct cgroup { * Dying cgroups are cgroups which were deleted by a user, * but are still existing because someone else is holding a reference. * max_descendants is a maximum allowed number of descent cgroups. + * + * nr_descendants and nr_dying_descendants are protected + * by cgroup_mutex and css_set_lock. It's fine to read them holding + * any of cgroup_mutex and css_set_lock; for writing both locks + * should be held. */ int nr_descendants; int nr_dying_descendants; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3008ea684aa0..786ceef2f222 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4811,9 +4811,11 @@ static void css_release_work_fn(struct work_struct *work) if (cgroup_on_dfl(cgrp)) cgroup_rstat_flush(cgrp); + spin_lock_irq(&css_set_lock); for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) tcgrp->nr_dying_descendants--; + spin_unlock_irq(&css_set_lock); cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); cgrp->id = -1; @@ -5031,12 +5033,14 @@ static struct cgroup *cgroup_create(struct cgroup *parent) if (ret) goto out_psi_free; + spin_lock_irq(&css_set_lock); for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; if (tcgrp != cgrp) tcgrp->nr_descendants++; } + spin_unlock_irq(&css_set_lock); if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); @@ -5321,10 +5325,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) if (parent && cgroup_is_threaded(cgrp)) parent->nr_threaded_children--; + spin_lock_irq(&css_set_lock); for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) { tcgrp->nr_descendants--; tcgrp->nr_dying_descendants++; } + spin_unlock_irq(&css_set_lock); cgroup1_check_for_release(parent); -- cgit From 76f969e8948d82e78e1bc4beb6b9465908e74873 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 19 Apr 2019 10:03:04 -0700 Subject: cgroup: cgroup v2 freezer Cgroup v1 implements the freezer controller, which provides an ability to stop the workload in a cgroup and temporarily free up some resources (cpu, io, network bandwidth and, potentially, memory) for some other tasks. Cgroup v2 lacks this functionality. This patch implements freezer for cgroup v2. Cgroup v2 freezer tries to put tasks into a state similar to jobctl stop. This means that tasks can be killed, ptraced (using PTRACE_SEIZE*), and interrupted. It is possible to attach to a frozen task, get some information (e.g. read registers) and detach. It's also possible to migrate a frozen tasks to another cgroup. This differs cgroup v2 freezer from cgroup v1 freezer, which mostly tried to imitate the system-wide freezer. However uninterruptible sleep is fine when all tasks are going to be frozen (hibernation case), it's not the acceptable state for some subset of the system. Cgroup v2 freezer is not supporting freezing kthreads. If a non-root cgroup contains kthread, the cgroup still can be frozen, but the kthread will remain running, the cgroup will be shown as non-frozen, and the notification will not be delivered. * PTRACE_ATTACH is not working because non-fatal signal delivery is blocked in frozen state. There are some interface differences between cgroup v1 and cgroup v2 freezer too, which are required to conform the cgroup v2 interface design principles: 1) There is no separate controller, which has to be turned on: the functionality is always available and is represented by cgroup.freeze and cgroup.events cgroup control files. 2) The desired state is defined by the cgroup.freeze control file. Any hierarchical configuration is allowed. 3) The interface is asynchronous. The actual state is available using cgroup.events control file ("frozen" field). There are no dedicated transitional states. 4) It's allowed to make any changes with the cgroup hierarchy (create new cgroups, remove old cgroups, move tasks between cgroups) no matter if some cgroups are frozen. Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo No-objection-from-me-by: Oleg Nesterov Cc: kernel-team@fb.com --- include/linux/cgroup-defs.h | 28 ++++ include/linux/cgroup.h | 43 ++++++ include/linux/sched.h | 2 + include/linux/sched/jobctl.h | 2 + kernel/cgroup/Makefile | 2 +- kernel/cgroup/cgroup.c | 110 ++++++++++++++- kernel/cgroup/freezer.c | 317 +++++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 2 + kernel/signal.c | 70 +++++++++- 9 files changed, 566 insertions(+), 10 deletions(-) create mode 100644 kernel/cgroup/freezer.c (limited to 'kernel') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 7d57890cec67..77258d276f93 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -65,6 +65,12 @@ enum { * specified at mount time and thus is implemented here. */ CGRP_CPUSET_CLONE_CHILDREN, + + /* Control group has to be frozen. */ + CGRP_FREEZE, + + /* Cgroup is frozen. */ + CGRP_FROZEN, }; /* cgroup_root->flags */ @@ -317,6 +323,25 @@ struct cgroup_rstat_cpu { struct cgroup *updated_next; /* NULL iff not on the list */ }; +struct cgroup_freezer_state { + /* Should the cgroup and its descendants be frozen. */ + bool freeze; + + /* Should the cgroup actually be frozen? */ + int e_freeze; + + /* Fields below are protected by css_set_lock */ + + /* Number of frozen descendant cgroups */ + int nr_frozen_descendants; + + /* + * Number of tasks, which are counted as frozen: + * frozen, SIGSTOPped, and PTRACEd. + */ + int nr_frozen_tasks; +}; + struct cgroup { /* self css with NULL ->ss, points back to this cgroup */ struct cgroup_subsys_state self; @@ -453,6 +478,9 @@ struct cgroup { /* If there is block congestion on this cgroup. */ atomic_t congestion_count; + /* Used to store internal freezer state */ + struct cgroup_freezer_state freezer; + /* ids of the ancestors at each level including self */ int ancestor_ids[]; }; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 81f58b4a5418..3e2efd412dfa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns) free_cgroup_ns(ns); } +#ifdef CONFIG_CGROUPS + +void cgroup_enter_frozen(void); +void cgroup_leave_frozen(bool always_leave); +void cgroup_update_frozen(struct cgroup *cgrp); +void cgroup_freeze(struct cgroup *cgrp, bool freeze); +void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, + struct cgroup *dst); +void cgroup_freezer_frozen_exit(struct task_struct *task); +static inline bool cgroup_task_freeze(struct task_struct *task) +{ + bool ret; + + if (task->flags & PF_KTHREAD) + return false; + + rcu_read_lock(); + ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags); + rcu_read_unlock(); + + return ret; +} + +static inline bool cgroup_task_frozen(struct task_struct *task) +{ + return task->frozen; +} + +#else /* !CONFIG_CGROUPS */ + +static inline void cgroup_enter_frozen(void) { } +static inline void cgroup_leave_frozen(bool always_leave) { } +static inline bool cgroup_task_freeze(struct task_struct *task) +{ + return false; +} +static inline bool cgroup_task_frozen(struct task_struct *task) +{ + return false; +} + +#endif /* !CONFIG_CGROUPS */ + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 1549584a1538..45b2199bd683 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -726,6 +726,8 @@ struct task_struct { #ifdef CONFIG_CGROUPS /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; + /* task is frozen/stopped (used by the cgroup freezer) */ + unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP /* to be used once the psi infrastructure lands upstream. */ diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index 98228bd48aee..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -18,6 +18,7 @@ struct task_struct; #define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ +#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -26,6 +27,7 @@ struct task_struct; #define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) +#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 8d5689ca94b9..5d7a76bfbbb7 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o +obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o obj-$(CONFIG_CGROUP_PIDS) += pids.o diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 786ceef2f222..6895464b54c6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2435,8 +2435,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) get_css_set(to_cset); to_cset->nr_tasks++; css_set_move_task(task, from_cset, to_cset, true); - put_css_set_locked(from_cset); from_cset->nr_tasks--; + /* + * If the source or destination cgroup is frozen, + * the task might require to change its state. + */ + cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp, + to_cset->dfl_cgrp); + put_css_set_locked(from_cset); + } } spin_unlock_irq(&css_set_lock); @@ -3477,8 +3484,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of, static int cgroup_events_show(struct seq_file *seq, void *v) { - seq_printf(seq, "populated %d\n", - cgroup_is_populated(seq_css(seq)->cgroup)); + struct cgroup *cgrp = seq_css(seq)->cgroup; + + seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp)); + seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags)); + return 0; } @@ -3540,6 +3550,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) } #endif +static int cgroup_freeze_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + + seq_printf(seq, "%d\n", cgrp->freezer.freeze); + + return 0; +} + +static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct cgroup *cgrp; + ssize_t ret; + int freeze; + + ret = kstrtoint(strstrip(buf), 0, &freeze); + if (ret) + return ret; + + if (freeze < 0 || freeze > 1) + return -ERANGE; + + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENOENT; + + cgroup_freeze(cgrp, freeze); + + cgroup_kn_unlock(of->kn); + + return nbytes; +} + static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of->kn->priv; @@ -4683,6 +4727,12 @@ static struct cftype cgroup_base_files[] = { .name = "cgroup.stat", .seq_show = cgroup_stat_show, }, + { + .name = "cgroup.freeze", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = cgroup_freeze_show, + .write = cgroup_freeze_write, + }, { .name = "cpu.stat", .flags = CFTYPE_NOT_ON_ROOT, @@ -5033,12 +5083,29 @@ static struct cgroup *cgroup_create(struct cgroup *parent) if (ret) goto out_psi_free; + /* + * New cgroup inherits effective freeze counter, and + * if the parent has to be frozen, the child has too. + */ + cgrp->freezer.e_freeze = parent->freezer.e_freeze; + if (cgrp->freezer.e_freeze) + set_bit(CGRP_FROZEN, &cgrp->flags); + spin_lock_irq(&css_set_lock); for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; - if (tcgrp != cgrp) + if (tcgrp != cgrp) { tcgrp->nr_descendants++; + + /* + * If the new cgroup is frozen, all ancestor cgroups + * get a new frozen descendant, but their state can't + * change because of this. + */ + if (cgrp->freezer.e_freeze) + tcgrp->freezer.nr_frozen_descendants++; + } } spin_unlock_irq(&css_set_lock); @@ -5329,6 +5396,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) { tcgrp->nr_descendants--; tcgrp->nr_dying_descendants++; + /* + * If the dying cgroup is frozen, decrease frozen descendants + * counters of ancestor cgroups. + */ + if (test_bit(CGRP_FROZEN, &cgrp->flags)) + tcgrp->freezer.nr_frozen_descendants--; } spin_unlock_irq(&css_set_lock); @@ -5782,6 +5855,29 @@ void cgroup_post_fork(struct task_struct *child) cset->nr_tasks++; css_set_move_task(child, NULL, cset, false); } + + /* + * If the cgroup has to be frozen, the new task has too. + * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get + * the task into the frozen state. + */ + if (unlikely(cgroup_task_freeze(child))) { + struct cgroup *cgrp; + + spin_lock(&child->sighand->siglock); + WARN_ON_ONCE(child->frozen); + cgrp = cset->dfl_cgrp; + child->jobctl |= JOBCTL_TRAP_FREEZE; + spin_unlock(&child->sighand->siglock); + + /* + * Calling cgroup_update_frozen() isn't required here, + * because it will be called anyway a bit later + * from do_freezer_trap(). So we avoid cgroup's + * transient switch from the frozen state and back. + */ + } + spin_unlock_irq(&css_set_lock); } @@ -5830,6 +5926,12 @@ void cgroup_exit(struct task_struct *tsk) spin_lock_irq(&css_set_lock); css_set_move_task(tsk, cset, NULL, false); cset->nr_tasks--; + + if (unlikely(cgroup_task_frozen(tsk))) + cgroup_freezer_frozen_exit(tsk); + else if (unlikely(cgroup_task_freeze(tsk))) + cgroup_update_frozen(task_dfl_cgroup(tsk)); + spin_unlock_irq(&css_set_lock); } else { get_css_set(cset); diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c new file mode 100644 index 000000000000..9d8cda478fc9 --- /dev/null +++ b/kernel/cgroup/freezer.c @@ -0,0 +1,317 @@ +//SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include "cgroup-internal.h" + +/* + * Propagate the cgroup frozen state upwards by the cgroup tree. + */ +static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) +{ + int desc = 1; + + /* + * If the new state is frozen, some freezing ancestor cgroups may change + * their state too, depending on if all their descendants are frozen. + * + * Otherwise, all ancestor cgroups are forced into the non-frozen state. + */ + while ((cgrp = cgroup_parent(cgrp))) { + if (frozen) { + cgrp->freezer.nr_frozen_descendants += desc; + if (!test_bit(CGRP_FROZEN, &cgrp->flags) && + test_bit(CGRP_FREEZE, &cgrp->flags) && + cgrp->freezer.nr_frozen_descendants == + cgrp->nr_descendants) { + set_bit(CGRP_FROZEN, &cgrp->flags); + cgroup_file_notify(&cgrp->events_file); + desc++; + } + } else { + cgrp->freezer.nr_frozen_descendants -= desc; + if (test_bit(CGRP_FROZEN, &cgrp->flags)) { + clear_bit(CGRP_FROZEN, &cgrp->flags); + cgroup_file_notify(&cgrp->events_file); + desc++; + } + } + } +} + +/* + * Revisit the cgroup frozen state. + * Checks if the cgroup is really frozen and perform all state transitions. + */ +void cgroup_update_frozen(struct cgroup *cgrp) +{ + bool frozen; + + lockdep_assert_held(&css_set_lock); + + /* + * If the cgroup has to be frozen (CGRP_FREEZE bit set), + * and all tasks are frozen and/or stopped, let's consider + * the cgroup frozen. Otherwise it's not frozen. + */ + frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && + cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); + + if (frozen) { + /* Already there? */ + if (test_bit(CGRP_FROZEN, &cgrp->flags)) + return; + + set_bit(CGRP_FROZEN, &cgrp->flags); + } else { + /* Already there? */ + if (!test_bit(CGRP_FROZEN, &cgrp->flags)) + return; + + clear_bit(CGRP_FROZEN, &cgrp->flags); + } + cgroup_file_notify(&cgrp->events_file); + + /* Update the state of ancestor cgroups. */ + cgroup_propagate_frozen(cgrp, frozen); +} + +/* + * Increment cgroup's nr_frozen_tasks. + */ +static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) +{ + cgrp->freezer.nr_frozen_tasks++; +} + +/* + * Decrement cgroup's nr_frozen_tasks. + */ +static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) +{ + cgrp->freezer.nr_frozen_tasks--; + WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); +} + +/* + * Enter frozen/stopped state, if not yet there. Update cgroup's counters, + * and revisit the state of the cgroup, if necessary. + */ +void cgroup_enter_frozen(void) +{ + struct cgroup *cgrp; + + if (current->frozen) + return; + + spin_lock_irq(&css_set_lock); + current->frozen = true; + cgrp = task_dfl_cgroup(current); + cgroup_inc_frozen_cnt(cgrp); + cgroup_update_frozen(cgrp); + spin_unlock_irq(&css_set_lock); +} + +/* + * Conditionally leave frozen/stopped state. Update cgroup's counters, + * and revisit the state of the cgroup, if necessary. + * + * If always_leave is not set, and the cgroup is freezing, + * we're racing with the cgroup freezing. In this case, we don't + * drop the frozen counter to avoid a transient switch to + * the unfrozen state. + */ +void cgroup_leave_frozen(bool always_leave) +{ + struct cgroup *cgrp; + + spin_lock_irq(&css_set_lock); + cgrp = task_dfl_cgroup(current); + if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { + cgroup_dec_frozen_cnt(cgrp); + cgroup_update_frozen(cgrp); + WARN_ON_ONCE(!current->frozen); + current->frozen = false; + } + spin_unlock_irq(&css_set_lock); + + if (unlikely(current->frozen)) { + /* + * If the task remained in the frozen state, + * make sure it won't reach userspace without + * entering the signal handling loop. + */ + spin_lock_irq(¤t->sighand->siglock); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } +} + +/* + * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE + * jobctl bit. + */ +static void cgroup_freeze_task(struct task_struct *task, bool freeze) +{ + unsigned long flags; + + /* If the task is about to die, don't bother with freezing it. */ + if (!lock_task_sighand(task, &flags)) + return; + + if (freeze) { + task->jobctl |= JOBCTL_TRAP_FREEZE; + signal_wake_up(task, false); + } else { + task->jobctl &= ~JOBCTL_TRAP_FREEZE; + wake_up_process(task); + } + + unlock_task_sighand(task, &flags); +} + +/* + * Freeze or unfreeze all tasks in the given cgroup. + */ +static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) +{ + struct css_task_iter it; + struct task_struct *task; + + lockdep_assert_held(&cgroup_mutex); + + spin_lock_irq(&css_set_lock); + if (freeze) + set_bit(CGRP_FREEZE, &cgrp->flags); + else + clear_bit(CGRP_FREEZE, &cgrp->flags); + spin_unlock_irq(&css_set_lock); + + css_task_iter_start(&cgrp->self, 0, &it); + while ((task = css_task_iter_next(&it))) { + /* + * Ignore kernel threads here. Freezing cgroups containing + * kthreads isn't supported. + */ + if (task->flags & PF_KTHREAD) + continue; + cgroup_freeze_task(task, freeze); + } + css_task_iter_end(&it); + + /* + * Cgroup state should be revisited here to cover empty leaf cgroups + * and cgroups which descendants are already in the desired state. + */ + spin_lock_irq(&css_set_lock); + if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) + cgroup_update_frozen(cgrp); + spin_unlock_irq(&css_set_lock); +} + +/* + * Adjust the task state (freeze or unfreeze) and revisit the state of + * source and destination cgroups. + */ +void cgroup_freezer_migrate_task(struct task_struct *task, + struct cgroup *src, struct cgroup *dst) +{ + lockdep_assert_held(&css_set_lock); + + /* + * Kernel threads are not supposed to be frozen at all. + */ + if (task->flags & PF_KTHREAD) + return; + + /* + * Adjust counters of freezing and frozen tasks. + * Note, that if the task is frozen, but the destination cgroup is not + * frozen, we bump both counters to keep them balanced. + */ + if (task->frozen) { + cgroup_inc_frozen_cnt(dst); + cgroup_dec_frozen_cnt(src); + } + cgroup_update_frozen(dst); + cgroup_update_frozen(src); + + /* + * Force the task to the desired state. + */ + cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); +} + +void cgroup_freezer_frozen_exit(struct task_struct *task) +{ + struct cgroup *cgrp = task_dfl_cgroup(task); + + lockdep_assert_held(&css_set_lock); + + cgroup_dec_frozen_cnt(cgrp); + cgroup_update_frozen(cgrp); +} + +void cgroup_freeze(struct cgroup *cgrp, bool freeze) +{ + struct cgroup_subsys_state *css; + struct cgroup *dsct; + bool applied = false; + + lockdep_assert_held(&cgroup_mutex); + + /* + * Nothing changed? Just exit. + */ + if (cgrp->freezer.freeze == freeze) + return; + + cgrp->freezer.freeze = freeze; + + /* + * Propagate changes downwards the cgroup tree. + */ + css_for_each_descendant_pre(css, &cgrp->self) { + dsct = css->cgroup; + + if (cgroup_is_dead(dsct)) + continue; + + if (freeze) { + dsct->freezer.e_freeze++; + /* + * Already frozen because of ancestor's settings? + */ + if (dsct->freezer.e_freeze > 1) + continue; + } else { + dsct->freezer.e_freeze--; + /* + * Still frozen because of ancestor's settings? + */ + if (dsct->freezer.e_freeze > 0) + continue; + + WARN_ON_ONCE(dsct->freezer.e_freeze < 0); + } + + /* + * Do change actual state: freeze or unfreeze. + */ + cgroup_do_freeze(dsct, freeze); + applied = true; + } + + /* + * Even if the actual state hasn't changed, let's notify a user. + * The state can be enforced by an ancestor cgroup: the cgroup + * can already be in the desired state or it can be locked in the + * opposite state, so that the transition will never happen. + * In both cases it's better to notify a user, that there is + * nothing to wait for. + */ + if (!applied) + cgroup_file_notify(&cgrp->events_file); +} diff --git a/kernel/fork.c b/kernel/fork.c index 9dcd18aa210b..8097a0cce4db 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1222,7 +1222,9 @@ static int wait_for_vfork_done(struct task_struct *child, int killed; freezer_do_not_count(); + cgroup_enter_frozen(); killed = wait_for_completion_killable(vfork); + cgroup_leave_frozen(false); freezer_count(); if (killed) { diff --git a/kernel/signal.c b/kernel/signal.c index f98448cf2def..095e0fc57b25 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -43,6 +43,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked) static bool recalc_sigpending_tsk(struct task_struct *t) { - if ((t->jobctl & JOBCTL_PENDING_MASK) || + if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) || PENDING(&t->pending, &t->blocked) || - PENDING(&t->signal->shared_pending, &t->blocked)) { + PENDING(&t->signal->shared_pending, &t->blocked) || + cgroup_task_frozen(t)) { set_tsk_thread_flag(t, TIF_SIGPENDING); return true; } @@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t preempt_disable(); read_unlock(&tasklist_lock); preempt_enable_no_resched(); + cgroup_enter_frozen(); freezable_schedule(); } else { /* @@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr) } /* Now we don't run again until woken by SIGCONT or SIGKILL */ + cgroup_enter_frozen(); freezable_schedule(); return true; } else { @@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void) } } +/** + * do_freezer_trap - handle the freezer jobctl trap + * + * Puts the task into frozen state, if only the task is not about to quit. + * In this case it drops JOBCTL_TRAP_FREEZE. + * + * CONTEXT: + * Must be called with @current->sighand->siglock held, + * which is always released before returning. + */ +static void do_freezer_trap(void) + __releases(¤t->sighand->siglock) +{ + /* + * If there are other trap bits pending except JOBCTL_TRAP_FREEZE, + * let's make another loop to give it a chance to be handled. + * In any case, we'll return back. + */ + if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) != + JOBCTL_TRAP_FREEZE) { + spin_unlock_irq(¤t->sighand->siglock); + return; + } + + /* + * Now we're sure that there is no pending fatal signal and no + * pending traps. Clear TIF_SIGPENDING to not get out of schedule() + * immediately (if there is a non-fatal signal pending), and + * put the task into sleep. + */ + __set_current_state(TASK_INTERRUPTIBLE); + clear_thread_flag(TIF_SIGPENDING); + spin_unlock_irq(¤t->sighand->siglock); + cgroup_enter_frozen(); + freezable_schedule(); +} + static int ptrace_signal(int signr, kernel_siginfo_t *info) { /* @@ -2442,6 +2483,10 @@ relock: ksig->info.si_signo = signr = SIGKILL; sigdelset(¤t->pending.signal, SIGKILL); recalc_sigpending(); + current->jobctl &= ~JOBCTL_TRAP_FREEZE; + spin_unlock_irq(&sighand->siglock); + if (unlikely(cgroup_task_frozen(current))) + cgroup_leave_frozen(true); goto fatal; } @@ -2452,9 +2497,24 @@ relock: do_signal_stop(0)) goto relock; - if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) { - do_jobctl_trap(); + if (unlikely(current->jobctl & + (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) { + if (current->jobctl & JOBCTL_TRAP_MASK) { + do_jobctl_trap(); + spin_unlock_irq(&sighand->siglock); + } else if (current->jobctl & JOBCTL_TRAP_FREEZE) + do_freezer_trap(); + + goto relock; + } + + /* + * If the task is leaving the frozen state, let's update + * cgroup counters and reset the frozen bit. + */ + if (unlikely(cgroup_task_frozen(current))) { spin_unlock_irq(&sighand->siglock); + cgroup_leave_frozen(true); goto relock; } @@ -2548,8 +2608,8 @@ relock: continue; } - fatal: spin_unlock_irq(&sighand->siglock); + fatal: /* * Anything else is fatal, maybe with a core dump. -- cgit From 712e35178754bbb785d00d5fcf5abaf32699bf11 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 19 Apr 2019 10:03:07 -0700 Subject: cgroup: make TRACE_CGROUP_PATH irq-safe To use the TRACE_CGROUP_PATH() macro with css_set_lock locked, let's make the macro irq-safe. It's necessary in order to trace cgroup freezer state transitions (frozen/not frozen), which are happening with css_set_lock locked. Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-internal.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 02c001ffe2e2..809e34a3c017 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void); #define TRACE_CGROUP_PATH(type, cgrp, ...) \ do { \ if (trace_cgroup_##type##_enabled()) { \ - spin_lock(&trace_cgroup_path_lock); \ + unsigned long flags; \ + spin_lock_irqsave(&trace_cgroup_path_lock, \ + flags); \ cgroup_path(cgrp, trace_cgroup_path, \ TRACE_CGROUP_PATH_LEN); \ trace_cgroup_##type(cgrp, trace_cgroup_path, \ ##__VA_ARGS__); \ - spin_unlock(&trace_cgroup_path_lock); \ + spin_unlock_irqrestore(&trace_cgroup_path_lock, \ + flags); \ } \ } while (0) -- cgit From 4c476d8cff48853645abc822154aaad208faebcc Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 19 Apr 2019 10:03:08 -0700 Subject: cgroup: add tracing points for cgroup v2 freezer Add cgroup:cgroup_freeze and cgroup:cgroup_unfreeze events, which are using the existing cgroup tracing infrastructure. Add the cgroup_event event class, which is similar to the cgroup class, but contains an additional integer field to store a new value (the level field is dropped). Also add two tracing events: cgroup_notify_populated and cgroup_notify_frozen, which are raised in a generic way using the TRACE_CGROUP_PATH() macro. This allows to trace cgroup state transitions and is generally helpful for debugging the cgroup freezer code. Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo --- include/trace/events/cgroup.h | 55 +++++++++++++++++++++++++++++++++++++++++++ kernel/cgroup/cgroup.c | 2 ++ kernel/cgroup/freezer.c | 15 +++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index a401ff5e7847..a566cc521476 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h @@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename, TP_ARGS(cgrp, path) ); +DEFINE_EVENT(cgroup, cgroup_freeze, + + TP_PROTO(struct cgroup *cgrp, const char *path), + + TP_ARGS(cgrp, path) +); + +DEFINE_EVENT(cgroup, cgroup_unfreeze, + + TP_PROTO(struct cgroup *cgrp, const char *path), + + TP_ARGS(cgrp, path) +); + DECLARE_EVENT_CLASS(cgroup_migrate, TP_PROTO(struct cgroup *dst_cgrp, const char *path, @@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks, TP_ARGS(dst_cgrp, path, task, threadgroup) ); +DECLARE_EVENT_CLASS(cgroup_event, + + TP_PROTO(struct cgroup *cgrp, const char *path, int val), + + TP_ARGS(cgrp, path, val), + + TP_STRUCT__entry( + __field( int, root ) + __field( int, id ) + __field( int, level ) + __string( path, path ) + __field( int, val ) + ), + + TP_fast_assign( + __entry->root = cgrp->root->hierarchy_id; + __entry->id = cgrp->id; + __entry->level = cgrp->level; + __assign_str(path, path); + __entry->val = val; + ), + + TP_printk("root=%d id=%d level=%d path=%s val=%d", + __entry->root, __entry->id, __entry->level, __get_str(path), + __entry->val) +); + +DEFINE_EVENT(cgroup_event, cgroup_notify_populated, + + TP_PROTO(struct cgroup *cgrp, const char *path, int val), + + TP_ARGS(cgrp, path, val) +); + +DEFINE_EVENT(cgroup_event, cgroup_notify_frozen, + + TP_PROTO(struct cgroup *cgrp, const char *path, int val), + + TP_ARGS(cgrp, path, val) +); + #endif /* _TRACE_CGROUP_H */ /* This part must be outside protection */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6895464b54c6..57edcf398d71 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -816,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) break; cgroup1_check_for_release(cgrp); + TRACE_CGROUP_PATH(notify_populated, cgrp, + cgroup_is_populated(cgrp)); cgroup_file_notify(&cgrp->events_file); child = cgrp; diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index 9d8cda478fc9..3bfbb3c8baf3 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c @@ -6,6 +6,8 @@ #include "cgroup-internal.h" +#include + /* * Propagate the cgroup frozen state upwards by the cgroup tree. */ @@ -28,6 +30,7 @@ static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) cgrp->nr_descendants) { set_bit(CGRP_FROZEN, &cgrp->flags); cgroup_file_notify(&cgrp->events_file); + TRACE_CGROUP_PATH(notify_frozen, cgrp, 1); desc++; } } else { @@ -35,6 +38,7 @@ static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) if (test_bit(CGRP_FROZEN, &cgrp->flags)) { clear_bit(CGRP_FROZEN, &cgrp->flags); cgroup_file_notify(&cgrp->events_file); + TRACE_CGROUP_PATH(notify_frozen, cgrp, 0); desc++; } } @@ -73,6 +77,7 @@ void cgroup_update_frozen(struct cgroup *cgrp) clear_bit(CGRP_FROZEN, &cgrp->flags); } cgroup_file_notify(&cgrp->events_file); + TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); /* Update the state of ancestor cgroups. */ cgroup_propagate_frozen(cgrp, frozen); @@ -189,6 +194,11 @@ static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) clear_bit(CGRP_FREEZE, &cgrp->flags); spin_unlock_irq(&css_set_lock); + if (freeze) + TRACE_CGROUP_PATH(freeze, cgrp); + else + TRACE_CGROUP_PATH(unfreeze, cgrp); + css_task_iter_start(&cgrp->self, 0, &it); while ((task = css_task_iter_next(&it))) { /* @@ -312,6 +322,9 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze) * In both cases it's better to notify a user, that there is * nothing to wait for. */ - if (!applied) + if (!applied) { + TRACE_CGROUP_PATH(notify_frozen, cgrp, + test_bit(CGRP_FROZEN, &cgrp->flags)); cgroup_file_notify(&cgrp->events_file); + } } -- cgit From 52fde6e70cccc2fcf3f39fed0d0392960e2c2b03 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 21 Apr 2019 19:40:44 -0400 Subject: function_graph: Have selftest also emulate tr->reset() as it did with tr->init() The function_graph boot up self test emulates the tr->init() function in order to add a wrapper around the function graph tracer entry code to test for lock ups and such. But it does not emulate the tr->reset(), and just calls the function_graph tracer tr->reset() function which will use its own fgraph_ops to unregister function tracing with. As the fgraph_ops is becoming more meaningful with the register_ftrace_graph() and unregister_ftrace_graph() functions, the two need to be the same. The emulated tr->init() uses its own fgraph_ops descriptor, which means the unregister_ftrace_graph() must use the same ftrace_ops, which the selftest currently does not do. By emulating the tr->reset() as the selftest does with the tr->init() it will be able to pass the same fgraph_ops descriptor to the unregister_ftrace_graph() as it did with the register_ftrace_graph(). Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_selftest.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 9d402e7fc949..69ee8ef12cee 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -792,7 +792,10 @@ trace_selftest_startup_function_graph(struct tracer *trace, /* check the trace buffer */ ret = trace_test_buffer(&tr->trace_buffer, &count); - trace->reset(tr); + /* Need to also simulate the tr->reset to remove this fgraph_ops */ + tracing_stop_cmdline_record(); + unregister_ftrace_graph(&fgraph_ops); + tracing_start(); if (!ret && !count) { -- cgit From 70c4cf17e445264453bc5323db3e50aa0ac9e81f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 19 Apr 2019 20:49:29 -0500 Subject: audit: fix a memory leak bug In audit_rule_change(), audit_data_to_entry() is firstly invoked to translate the payload data to the kernel's rule representation. In audit_data_to_entry(), depending on the audit field type, an audit tree may be created in audit_make_tree(), which eventually invokes kmalloc() to allocate the tree. Since this tree is a temporary tree, it will be then freed in the following execution, e.g., audit_add_rule() if the message type is AUDIT_ADD_RULE or audit_del_rule() if the message type is AUDIT_DEL_RULE. However, if the message type is neither AUDIT_ADD_RULE nor AUDIT_DEL_RULE, i.e., the default case of the switch statement, this temporary tree is not freed. To fix this issue, only allocate the tree when the type is AUDIT_ADD_RULE or AUDIT_DEL_RULE. Signed-off-by: Wenwen Wang Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditfilter.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 2c3c2f349b23..1bc6410413e6 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1114,22 +1114,24 @@ int audit_rule_change(int type, int seq, void *data, size_t datasz) int err = 0; struct audit_entry *entry; - entry = audit_data_to_entry(data, datasz); - if (IS_ERR(entry)) - return PTR_ERR(entry); - switch (type) { case AUDIT_ADD_RULE: + entry = audit_data_to_entry(data, datasz); + if (IS_ERR(entry)) + return PTR_ERR(entry); err = audit_add_rule(entry); audit_log_rule_change("add_rule", &entry->rule, !err); break; case AUDIT_DEL_RULE: + entry = audit_data_to_entry(data, datasz); + if (IS_ERR(entry)) + return PTR_ERR(entry); err = audit_del_rule(entry); audit_log_rule_change("remove_rule", &entry->rule, !err); break; default: - err = -EINVAL; WARN_ON(1); + return -EINVAL; } if (err || type == AUDIT_DEL_RULE) { -- cgit From 7df737e991069d75eec1ded1c8b37e81b8c54df9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 19 Apr 2019 07:44:54 -0700 Subject: bpf: remove global variables Move three global variables protected by bpf_verifier_lock into 'struct bpf_verifier_env' to allow parallel verification. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 5 +++++ kernel/bpf/verifier.c | 25 +++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b3ab61fe1932..1305ccbd8fe6 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -295,6 +295,11 @@ struct bpf_verifier_env { const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; + struct { + int *insn_state; + int *insn_stack; + int cur_stack; + } cfg; u32 subprog_cnt; /* number of instructions analyzed by the verifier */ u32 insn_processed; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db301e9b5295..5f0eb5bd5589 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5369,10 +5369,6 @@ enum { #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) -static int *insn_stack; /* stack of insns to process */ -static int cur_stack; /* current stack index */ -static int *insn_state; - /* t, w, e - match pseudo-code above: * t - index of current instruction * w - next instruction @@ -5380,6 +5376,9 @@ static int *insn_state; */ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { + int *insn_stack = env->cfg.insn_stack; + int *insn_state = env->cfg.insn_state; + if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) return 0; @@ -5400,9 +5399,9 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) /* tree-edge */ insn_state[t] = DISCOVERED | e; insn_state[w] = DISCOVERED; - if (cur_stack >= env->prog->len) + if (env->cfg.cur_stack >= env->prog->len) return -E2BIG; - insn_stack[cur_stack++] = w; + insn_stack[env->cfg.cur_stack++] = w; return 1; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { verbose_linfo(env, t, "%d: ", t); @@ -5426,14 +5425,15 @@ static int check_cfg(struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi; int insn_cnt = env->prog->len; + int *insn_stack, *insn_state; int ret = 0; int i, t; - insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) return -ENOMEM; - insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_stack) { kvfree(insn_state); return -ENOMEM; @@ -5441,12 +5441,12 @@ static int check_cfg(struct bpf_verifier_env *env) insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ insn_stack[0] = 0; /* 0 is the first instruction */ - cur_stack = 1; + env->cfg.cur_stack = 1; peek_stack: - if (cur_stack == 0) + if (env->cfg.cur_stack == 0) goto check_state; - t = insn_stack[cur_stack - 1]; + t = insn_stack[env->cfg.cur_stack - 1]; if (BPF_CLASS(insns[t].code) == BPF_JMP || BPF_CLASS(insns[t].code) == BPF_JMP32) { @@ -5515,7 +5515,7 @@ peek_stack: mark_explored: insn_state[t] = EXPLORED; - if (cur_stack-- <= 0) { + if (env->cfg.cur_stack-- <= 0) { verbose(env, "pop stack internal bug\n"); ret = -EFAULT; goto err_free; @@ -5535,6 +5535,7 @@ check_state: err_free: kvfree(insn_state); kvfree(insn_stack); + env->cfg.insn_state = env->cfg.insn_stack = NULL; return ret; } -- cgit From 45a73c17bfb92c3ceebedc80a750ef2c2931c26b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 19 Apr 2019 07:44:55 -0700 Subject: bpf: drop bpf_verifier_lock Drop bpf_verifier_lock for root to avoid being DoS-ed by unprivileged. The BPF verifier is now fully parallel. All unpriv users are still serialized by bpf_verifier_lock to avoid exhausting kernel memory by running N parallel verifications. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5f0eb5bd5589..423f242a5efb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8132,9 +8132,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; + is_priv = capable(CAP_SYS_ADMIN); /* grab the mutex to protect few globals used by verifier */ - mutex_lock(&bpf_verifier_lock); + if (!is_priv) + mutex_lock(&bpf_verifier_lock); if (attr->log_level || attr->log_buf || attr->log_size) { /* user requested verbose verifier output @@ -8157,7 +8159,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; - is_priv = capable(CAP_SYS_ADMIN); env->allow_ptr_leaks = is_priv; ret = replace_map_fd_with_map_ptr(env); @@ -8270,7 +8271,8 @@ err_release_maps: release_maps(env); *prog = env->prog; err_unlock: - mutex_unlock(&bpf_verifier_lock); + if (!is_priv) + mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); err_free_env: kfree(env); -- cgit From 148a97d5a02a62f81b5d6176f871c94a65e1f3af Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Apr 2019 17:24:37 +0300 Subject: dma-mapping: remove an unnecessary NULL check We already dereferenced "dev" when we called get_dma_ops() so this NULL check is too late. We're not supposed to pass NULL "dev" pointers to dma_alloc_attrs(). Signed-off-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- kernel/dma/mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 685a53f2a793..f7afdadb6770 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -244,7 +244,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, const struct dma_map_ops *ops = get_dma_ops(dev); void *cpu_addr; - WARN_ON_ONCE(dev && !dev->coherent_dma_mask); + WARN_ON_ONCE(!dev->coherent_dma_mask); if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; -- cgit From 52ba92f5882adf1ee785c4c5ef23491948917fcd Mon Sep 17 00:00:00 2001 From: Kimberly Brown Date: Mon, 1 Apr 2019 22:51:41 -0400 Subject: irqdesc: Replace irq_kobj_type's default_attrs field with groups The kobj_type default_attrs field is being replaced by the default_groups field. Replace irq_kobj_type's default_attrs field with default_groups and use the ATTRIBUTE_GROUPS macro to create irq_groups. This patch was tested by verifying that the sysfs files for the attributes in the default groups were created. Signed-off-by: Kimberly Brown Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/irqdesc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 13539e12cd80..bbec57bda666 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -275,11 +275,12 @@ static struct attribute *irq_attrs[] = { &actions_attr.attr, NULL }; +ATTRIBUTE_GROUPS(irq); static struct kobj_type irq_kobj_type = { .release = irq_kobj_release, .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = irq_attrs, + .default_groups = irq_groups, }; static void irq_sysfs_add(int irq, struct irq_desc *desc) -- cgit From 2064fbc779d43c5ac38e20a4b4979e365f87349f Mon Sep 17 00:00:00 2001 From: Kimberly Brown Date: Mon, 1 Apr 2019 22:51:47 -0400 Subject: padata: Replace padata_attr_type default_attrs field with groups The kobj_type default_attrs field is being replaced by the default_groups field. Replace padata_attr_type's default_attrs field with default_groups and use the ATTRIBUTE_GROUPS macro to create padata_default_groups. This patch was tested by loading the pcrypt module and verifying that the sysfs files for the attributes in the default groups were created. Signed-off-by: Kimberly Brown Signed-off-by: Greg Kroah-Hartman --- kernel/padata.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/padata.c b/kernel/padata.c index 3e2633ae3bca..2d2fddbb7a4c 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -957,6 +957,7 @@ static struct attribute *padata_default_attrs[] = { ¶llel_cpumask_attr.attr, NULL, }; +ATTRIBUTE_GROUPS(padata_default); static ssize_t padata_sysfs_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -995,7 +996,7 @@ static const struct sysfs_ops padata_sysfs_ops = { static struct kobj_type padata_attr_type = { .sysfs_ops = &padata_sysfs_ops, - .default_attrs = padata_default_attrs, + .default_groups = padata_default_groups, .release = padata_sysfs_release, }; -- cgit From 9782adeb3d9d6e33fc52392031b8e00270515442 Mon Sep 17 00:00:00 2001 From: Kimberly Brown Date: Mon, 1 Apr 2019 22:51:53 -0400 Subject: cpufreq: schedutil: Replace default_attrs field with groups The kobj_type default_attrs field is being replaced by the default_groups field. Replace sugov_tunables_ktype's default_attrs field with default groups. Change "sugov_attributes" to "sugov_attrs" and use the ATTRIBUTE_GROUPS macro to create sugov_groups. This patch was tested by setting the scaling governor to schedutil and verifying that the sysfs files for the attributes in the default groups were created. Signed-off-by: Kimberly Brown Acked-by: Peter Zijlstra (Intel) Acked-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/sched/cpufreq_schedutil.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 5c41ea367422..148b60c8993d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -598,13 +598,14 @@ rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); -static struct attribute *sugov_attributes[] = { +static struct attribute *sugov_attrs[] = { &rate_limit_us.attr, NULL }; +ATTRIBUTE_GROUPS(sugov); static struct kobj_type sugov_tunables_ktype = { - .default_attrs = sugov_attributes, + .default_groups = sugov_groups, .sysfs_ops = &governor_sysfs_ops, }; -- cgit From 70283454c918f1d65de0ec50c45ef592d781bcae Mon Sep 17 00:00:00 2001 From: Kimberly Brown Date: Mon, 1 Apr 2019 22:51:58 -0400 Subject: livepatch: Replace klp_ktype_patch's default_attrs with groups The kobj_type default_attrs field is being replaced by the default_groups field. Replace klp_ktype_patch's default_attrs field with default_groups and use the ATTRIBUTE_GROUPS macro to create klp_patch_groups. This patch was tested by loading the livepatch-sample module and verifying that the sysfs files for the attributes in the default groups were created. Signed-off-by: Kimberly Brown Acked-by: Jiri Kosina Acked-by: Miroslav Benes Acked-by: Petr Mladek Signed-off-by: Greg Kroah-Hartman --- kernel/livepatch/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index eb0ee10a1981..34a8338657d2 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -419,6 +419,7 @@ static struct attribute *klp_patch_attrs[] = { &force_kobj_attr.attr, NULL }; +ATTRIBUTE_GROUPS(klp_patch); static void klp_free_object_dynamic(struct klp_object *obj) { @@ -546,7 +547,7 @@ static void klp_kobj_release_patch(struct kobject *kobj) static struct kobj_type klp_ktype_patch = { .release = klp_kobj_release_patch, .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = klp_patch_attrs, + .default_groups = klp_patch_groups, }; static void klp_kobj_release_object(struct kobject *kobj) -- cgit From 118c8e9ae629d35fa9b3d27a7b9d59298b1b4183 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 25 Apr 2019 14:37:23 -0700 Subject: bpf: support BPF_PROG_QUERY for BPF_FLOW_DISSECTOR attach_type target_fd is target namespace. If there is a flow dissector BPF program attached to that namespace, its (single) id is returned. v5: * drop net ref right after rcu unlock (Daniel Borkmann) v4: * add missing put_net (Jann Horn) v3: * add missing inline to skb_flow_dissector_prog_query static def (kbuild test robot ) v2: * don't sleep in rcu critical section (Jakub Kicinski) * check input prog_cnt (exit early) Cc: Jann Horn Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 8 ++++++++ kernel/bpf/syscall.c | 2 ++ net/core/flow_dissector.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) (limited to 'kernel') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 998256c2820b..6d58fa8a65fd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1258,11 +1258,19 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, unsigned int key_count); #ifdef CONFIG_NET +int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); #else +static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EOPNOTSUPP; +} + static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 92c9b8a32b50..b0de49598341 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2009,6 +2009,8 @@ static int bpf_prog_query(const union bpf_attr *attr, break; case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); + case BPF_FLOW_DISSECTOR: + return skb_flow_dissector_prog_query(attr, uattr); default: return -EINVAL; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index fac712cee9d5..9ca784c592ac 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -65,6 +65,45 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, } EXPORT_SYMBOL(skb_flow_dissector_init); +int skb_flow_dissector_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + u32 prog_id, prog_cnt = 0, flags = 0; + struct bpf_prog *attached; + struct net *net; + + if (attr->query.query_flags) + return -EINVAL; + + net = get_net_ns_by_fd(attr->query.target_fd); + if (IS_ERR(net)) + return PTR_ERR(net); + + rcu_read_lock(); + attached = rcu_dereference(net->flow_dissector_prog); + if (attached) { + prog_cnt = 1; + prog_id = attached->aux->id; + } + rcu_read_unlock(); + + put_net(net); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) + return -EFAULT; + if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) + return -EFAULT; + + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) + return 0; + + if (copy_to_user(prog_ids, &prog_id, sizeof(u32))) + return -EFAULT; + + return 0; +} + int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { -- cgit From e43e9c339a78a0978f4ce473f645cedc05e6a57c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 Apr 2019 13:51:03 -0400 Subject: fsnotify: switch send_to_group() and ->handle_event to const struct qstr * note that conditions surrounding accesses to dname in audit_watch_handle_event() and audit_mark_handle_event() guarantee that dname won't have been NULL. Signed-off-by: Al Viro --- fs/notify/dnotify/dnotify.c | 2 +- fs/notify/fanotify/fanotify.c | 2 +- fs/notify/fsnotify.c | 4 ++-- fs/notify/inotify/inotify.h | 2 +- fs/notify/inotify/inotify_fsnotify.c | 6 +++--- include/linux/fsnotify_backend.h | 2 +- kernel/audit_fsnotify.c | 4 ++-- kernel/audit_tree.c | 2 +- kernel/audit_watch.c | 6 +++--- 9 files changed, 15 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 58d77dc696eb..250369d6901d 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -81,7 +81,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) static int dnotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie, + const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 6b9c27548997..a34d7e003d7d 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -361,7 +361,7 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) static int fanotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie, + const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { int ret = 0; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9cbb5ae11d2f..5433e37fb0c5 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -195,7 +195,7 @@ EXPORT_SYMBOL_GPL(__fsnotify_parent); static int send_to_group(struct inode *to_tell, __u32 mask, const void *data, int data_is, u32 cookie, - const unsigned char *file_name, + const struct qstr *file_name, struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; @@ -379,7 +379,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, */ while (fsnotify_iter_select_report_types(&iter_info)) { ret = send_to_group(to_tell, mask, data, data_is, cookie, - file_name->name, &iter_info); + file_name, &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 74ae60305189..3f246f7b8a92 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,7 +27,7 @@ extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, extern int inotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie, + const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); extern const struct fsnotify_ops inotify_fsnotify_ops; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index ff30abd6a49b..e87f012cbff7 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -67,7 +67,7 @@ static int inotify_merge(struct list_head *list, int inotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie, + const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); @@ -89,7 +89,7 @@ int inotify_handle_event(struct fsnotify_group *group, return 0; } if (file_name) { - len = strlen(file_name); + len = strlen(file_name->name); alloc_len += len + 1; } @@ -129,7 +129,7 @@ int inotify_handle_event(struct fsnotify_group *group, event->sync_cookie = cookie; event->name_len = len; if (len) - strcpy(event->name, file_name); + strcpy(event->name, file_name->name); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7eb7821766d5..c28f6ed1f59b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -117,7 +117,7 @@ struct fsnotify_ops { int (*handle_event)(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie, + const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index 37ae95cfb7f4..fb241805569c 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -164,7 +164,7 @@ static void audit_autoremove_mark_rule(struct audit_fsnotify_mark *audit_mark) static int audit_mark_handle_event(struct fsnotify_group *group, struct inode *to_tell, u32 mask, const void *data, int data_type, - const unsigned char *dname, u32 cookie, + const struct qstr *dname, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); @@ -188,7 +188,7 @@ static int audit_mark_handle_event(struct fsnotify_group *group, } if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { - if (audit_compare_dname_path(dname, audit_mark->path, AUDIT_NAME_FULL)) + if (audit_compare_dname_path(dname->name, audit_mark->path, AUDIT_NAME_FULL)) return 0; audit_update_mark(audit_mark, inode); } else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF)) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index abfb112f26aa..e49c912f862d 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1040,7 +1040,7 @@ static void evict_chunk(struct audit_chunk *chunk) static int audit_tree_handle_event(struct fsnotify_group *group, struct inode *to_tell, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie, + const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { return 0; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e8d1adeb2223..3c12fd5b680e 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -482,7 +482,7 @@ void audit_remove_watch_rule(struct audit_krule *krule) static int audit_watch_handle_event(struct fsnotify_group *group, struct inode *to_tell, u32 mask, const void *data, int data_type, - const unsigned char *dname, u32 cookie, + const struct qstr *dname, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); @@ -507,9 +507,9 @@ static int audit_watch_handle_event(struct fsnotify_group *group, } if (mask & (FS_CREATE|FS_MOVED_TO) && inode) - audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0); + audit_update_watch(parent, dname->name, inode->i_sb->s_dev, inode->i_ino, 0); else if (mask & (FS_DELETE|FS_MOVED_FROM)) - audit_update_watch(parent, dname, AUDIT_DEV_UNSET, AUDIT_INO_UNSET, 1); + audit_update_watch(parent, dname->name, AUDIT_DEV_UNSET, AUDIT_INO_UNSET, 1); else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF)) audit_remove_parent_watches(parent); -- cgit From 6921d4ebe418e7cce9f65c1f38c93ea82a1f546c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 Apr 2019 14:09:49 -0400 Subject: audit_update_watch(): switch to const struct qstr * Signed-off-by: Al Viro --- kernel/audit_watch.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3c12fd5b680e..d832ce9df065 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -255,18 +255,19 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc /* Update inode info in audit rules based on filesystem event. */ static void audit_update_watch(struct audit_parent *parent, - const char *dname, dev_t dev, + const struct qstr *dname, dev_t dev, unsigned long ino, unsigned invalidating) { struct audit_watch *owatch, *nwatch, *nextw; struct audit_krule *r, *nextr; struct audit_entry *oentry, *nentry; + const unsigned char *name = dname->name; mutex_lock(&audit_filter_mutex); /* Run all of the watches on this parent looking for the one that * matches the given dname */ list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { - if (audit_compare_dname_path(dname, owatch->path, + if (audit_compare_dname_path(name, owatch->path, AUDIT_NAME_FULL)) continue; @@ -507,9 +508,9 @@ static int audit_watch_handle_event(struct fsnotify_group *group, } if (mask & (FS_CREATE|FS_MOVED_TO) && inode) - audit_update_watch(parent, dname->name, inode->i_sb->s_dev, inode->i_ino, 0); + audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0); else if (mask & (FS_DELETE|FS_MOVED_FROM)) - audit_update_watch(parent, dname->name, AUDIT_DEV_UNSET, AUDIT_INO_UNSET, 1); + audit_update_watch(parent, dname, AUDIT_DEV_UNSET, AUDIT_INO_UNSET, 1); else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF)) audit_remove_parent_watches(parent); -- cgit From 9df1c28bb75217b244257152ab7d788bb2a386d0 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:47 -0700 Subject: bpf: add writable context for raw tracepoints This is an opt-in interface that allows a tracepoint to provide a safe buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program. The size of the buffer must be a compile-time constant, and is checked before allowing a BPF program to attach to a tracepoint that uses this feature. The pointer to this buffer will be the first argument of tracepoints that opt in; the pointer is valid and can be bpf_probe_read() by both BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that attach to such a tracepoint, but the buffer to which it points may only be written by the latter. Signed-off-by: Matt Mullins Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/linux/bpf_types.h | 1 + include/linux/tracepoint-defs.h | 1 + include/trace/bpf_probe.h | 27 +++++++++++++++++++++++++-- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 8 ++++++-- kernel/bpf/verifier.c | 31 +++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 24 ++++++++++++++++++++++++ 8 files changed, 91 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f15432d90728..cd6341eabd74 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -272,6 +272,7 @@ enum bpf_reg_type { PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ }; /* The information passed from prog-specific *_is_valid_access @@ -361,6 +362,7 @@ struct bpf_prog_aux { u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; + u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d26991a16894..a10d37bce364 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 49ba9cde7e4b..b29950a19205 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -45,6 +45,7 @@ struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; u32 num_args; + u32 writable_size; } __aligned(32); #endif diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 505dae0bed80..d6e556c0a085 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ * to make sure that if the tracepoint handling changes, the * bpf probe will fail to compile unless it too is updated. */ -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, call, proto, args) \ +#define __DEFINE_EVENT(template, call, proto, args, size) \ static inline void bpf_test_probe_##call(void) \ { \ check_trace_callback_type_##call(__bpf_trace_##template); \ @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ .tp = &__tracepoint_##call, \ .bpf_func = (void *)__bpf_trace_##template, \ .num_args = COUNT_ARGS(args), \ + .writable_size = size, \ }; +#define FIRST(x, ...) x + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +static inline void bpf_test_buffer_##call(void) \ +{ \ + /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ + * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \ + * dead-code-eliminated. \ + */ \ + FIRST(proto); \ + (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ +} \ +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef DEFINE_EVENT_WRITABLE +#undef __DEFINE_EVENT +#undef FIRST + #endif /* CONFIG_BPF_EVENTS */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eaf2d3284248..f7fa7a34a62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b0de49598341..ae141e745f92 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) } raw_tp->btp = btp; - prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, - BPF_PROG_TYPE_RAW_TRACEPOINT); + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto out_free_tp; } + if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && + prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { + err = -EINVAL; + goto out_put_prog; + } err = bpf_probe_register(raw_tp->btp, prog); if (err) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 423f242a5efb..2ef442c62c0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -405,6 +405,7 @@ static const char * const reg_type_str[] = { [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", [PTR_TO_TCP_SOCK] = "tcp_sock", [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", + [PTR_TO_TP_BUFFER] = "tp_buffer", }; static char slot_type_char[] = { @@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env, return 0; } +static int check_tp_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size) +{ + if (off < 0) { + verbose(env, + "R%d invalid tracepoint buffer access: off=%d, size=%d", + regno, off, size); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, + "R%d invalid variable buffer offset: off=%d, var_off=%s", + regno, off, tn_buf); + return -EACCES; + } + if (off + size > env->prog->aux->max_tp_access) + env->prog->aux->max_tp_access = off + size; + + return 0; +} + + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_TP_BUFFER) { + err = check_tp_buffer_access(env, reg, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 91800be0c8eb..8607aba1d882 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { const struct bpf_prog_ops raw_tracepoint_prog_ops = { }; +static bool raw_tp_writable_prog_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off == 0) { + if (size != sizeof(u64) || type != BPF_READ) + return false; + info->reg_type = PTR_TO_TP_BUFFER; + } + return raw_tp_prog_is_valid_access(off, size, type, prog, info); +} + +const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { + .get_func_proto = raw_tp_prog_func_proto, + .is_valid_access = raw_tp_writable_prog_is_valid_access, +}; + +const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) return -EINVAL; + if (prog->aux->max_tp_access > btp->writable_size) + return -EINVAL; + return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); } -- cgit From 6ac99e8f23d4b10258406ca0dd7bffca5f31da9d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 26 Apr 2019 16:39:39 -0700 Subject: bpf: Introduce bpf sk local storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After allowing a bpf prog to - directly read the skb->sk ptr - get the fullsock bpf_sock by "bpf_sk_fullsock()" - get the bpf_tcp_sock by "bpf_tcp_sock()" - get the listener sock by "bpf_get_listener_sock()" - avoid duplicating the fields of "(bpf_)sock" and "(bpf_)tcp_sock" into different bpf running context. this patch is another effort to make bpf's network programming more intuitive to do (together with memory and performance benefit). When bpf prog needs to store data for a sk, the current practice is to define a map with the usual 4-tuples (src/dst ip/port) as the key. If multiple bpf progs require to store different sk data, multiple maps have to be defined. Hence, wasting memory to store the duplicated keys (i.e. 4 tuples here) in each of the bpf map. [ The smallest key could be the sk pointer itself which requires some enhancement in the verifier and it is a separate topic. ] Also, the bpf prog needs to clean up the elem when sk is freed. Otherwise, the bpf map will become full and un-usable quickly. The sk-free tracking currently could be done during sk state transition (e.g. BPF_SOCK_OPS_STATE_CB). The size of the map needs to be predefined which then usually ended-up with an over-provisioned map in production. Even the map was re-sizable, while the sk naturally come and go away already, this potential re-size operation is arguably redundant if the data can be directly connected to the sk itself instead of proxy-ing through a bpf map. This patch introduces sk->sk_bpf_storage to provide local storage space at sk for bpf prog to use. The space will be allocated when the first bpf prog has created data for this particular sk. The design optimizes the bpf prog's lookup (and then optionally followed by an inline update). bpf_spin_lock should be used if the inline update needs to be protected. BPF_MAP_TYPE_SK_STORAGE: ----------------------- To define a bpf "sk-local-storage", a BPF_MAP_TYPE_SK_STORAGE map (new in this patch) needs to be created. Multiple BPF_MAP_TYPE_SK_STORAGE maps can be created to fit different bpf progs' needs. The map enforces BTF to allow printing the sk-local-storage during a system-wise sk dump (e.g. "ss -ta") in the future. The purpose of a BPF_MAP_TYPE_SK_STORAGE map is not for lookup/update/delete a "sk-local-storage" data from a particular sk. Think of the map as a meta-data (or "type") of a "sk-local-storage". This particular "type" of "sk-local-storage" data can then be stored in any sk. The main purposes of this map are mostly: 1. Define the size of a "sk-local-storage" type. 2. Provide a similar syscall userspace API as the map (e.g. lookup/update, map-id, map-btf...etc.) 3. Keep track of all sk's storages of this "type" and clean them up when the map is freed. sk->sk_bpf_storage: ------------------ The main lookup/update/delete is done on sk->sk_bpf_storage (which is a "struct bpf_sk_storage"). When doing a lookup, the "map" pointer is now used as the "key" to search on the sk_storage->list. The "map" pointer is actually serving as the "type" of the "sk-local-storage" that is being requested. To allow very fast lookup, it should be as fast as looking up an array at a stable-offset. At the same time, it is not ideal to set a hard limit on the number of sk-local-storage "type" that the system can have. Hence, this patch takes a cache approach. The last search result from sk_storage->list is cached in sk_storage->cache[] which is a stable sized array. Each "sk-local-storage" type has a stable offset to the cache[] array. In the future, a map's flag could be introduced to do cache opt-out/enforcement if it became necessary. The cache size is 16 (i.e. 16 types of "sk-local-storage"). Programs can share map. On the program side, having a few bpf_progs running in the networking hotpath is already a lot. The bpf_prog should have already consolidated the existing sock-key-ed map usage to minimize the map lookup penalty. 16 has enough runway to grow. All sk-local-storage data will be removed from sk->sk_bpf_storage during sk destruction. bpf_sk_storage_get() and bpf_sk_storage_delete(): ------------------------------------------------ Instead of using bpf_map_(lookup|update|delete)_elem(), the bpf prog needs to use the new helper bpf_sk_storage_get() and bpf_sk_storage_delete(). The verifier can then enforce the ARG_PTR_TO_SOCKET argument. The bpf_sk_storage_get() also allows to "create" new elem if one does not exist in the sk. It is done by the new BPF_SK_STORAGE_GET_F_CREATE flag. An optional value can also be provided as the initial value during BPF_SK_STORAGE_GET_F_CREATE. The BPF_MAP_TYPE_SK_STORAGE also supports bpf_spin_lock. Together, it has eliminated the potential use cases for an equivalent bpf_map_update_elem() API (for bpf_prog) in this patch. Misc notes: ---------- 1. map_get_next_key is not supported. From the userspace syscall perspective, the map has the socket fd as the key while the map can be shared by pinned-file or map-id. Since btf is enforced, the existing "ss" could be enhanced to pretty print the local-storage. Supporting a kernel defined btf with 4 tuples as the return key could be explored later also. 2. The sk->sk_lock cannot be acquired. Atomic operations is used instead. e.g. cmpxchg is done on the sk->sk_bpf_storage ptr. Please refer to the source code comments for the details in synchronization cases and considerations. 3. The mem is charged to the sk->sk_omem_alloc as the sk filter does. Benchmark: --------- Here is the benchmark data collected by turning on the "kernel.bpf_stats_enabled" sysctl. Two bpf progs are tested: One bpf prog with the usual bpf hashmap (max_entries = 8192) with the sk ptr as the key. (verifier is modified to support sk ptr as the key That should have shortened the key lookup time.) Another bpf prog is with the new BPF_MAP_TYPE_SK_STORAGE. Both are storing a "u32 cnt", do a lookup on "egress_skb/cgroup" for each egress skb and then bump the cnt. netperf is used to drive data with 4096 connected UDP sockets. BPF_MAP_TYPE_HASH with a modifier verifier (152ns per bpf run) 27: cgroup_skb name egress_sk_map tag 74f56e832918070b run_time_ns 58280107540 run_cnt 381347633 loaded_at 2019-04-15T13:46:39-0700 uid 0 xlated 344B jited 258B memlock 4096B map_ids 16 btf_id 5 BPF_MAP_TYPE_SK_STORAGE in this patch (66ns per bpf run) 30: cgroup_skb name egress_sk_stora tag d4aa70984cc7bbf6 run_time_ns 25617093319 run_cnt 390989739 loaded_at 2019-04-15T13:47:54-0700 uid 0 xlated 168B jited 156B memlock 4096B map_ids 17 btf_id 6 Here is a high-level picture on how are the objects organized: sk ┌──────┐ │ │ │ │ │ │ │*sk_bpf_storage─────▶ bpf_sk_storage └──────┘ ┌───────┐ ┌───────────┤ list │ │ │ │ │ │ │ │ │ │ │ └───────┘ │ │ elem │ ┌────────┐ ├─▶│ snode │ │ ├────────┤ │ │ data │ bpf_map │ ├────────┤ ┌─────────┐ │ │map_node│◀─┬─────┤ list │ │ └────────┘ │ │ │ │ │ │ │ │ elem │ │ │ │ ┌────────┐ │ └─────────┘ └─▶│ snode │ │ ├────────┤ │ bpf_map │ data │ │ ┌─────────┐ ├────────┤ │ │ list ├───────▶│map_node│ │ │ │ └────────┘ │ │ │ │ │ │ elem │ └─────────┘ ┌────────┐ │ ┌─▶│ snode │ │ │ ├────────┤ │ │ │ data │ │ │ ├────────┤ │ │ │map_node│◀─┘ │ └────────┘ │ │ │ ┌───────┐ sk └──────────│ list │ ┌──────┐ │ │ │ │ │ │ │ │ │ │ │ │ └───────┘ │*sk_bpf_storage───────▶bpf_sk_storage └──────┘ Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 + include/linux/bpf_types.h | 1 + include/net/bpf_sk_storage.h | 13 + include/net/sock.h | 5 + include/uapi/linux/bpf.h | 44 ++- kernel/bpf/syscall.c | 3 +- kernel/bpf/verifier.c | 27 +- net/bpf/test_run.c | 2 + net/core/Makefile | 1 + net/core/bpf_sk_storage.c | 804 +++++++++++++++++++++++++++++++++++++++++++ net/core/filter.c | 12 + net/core/sock.c | 5 + 12 files changed, 914 insertions(+), 5 deletions(-) create mode 100644 include/net/bpf_sk_storage.h create mode 100644 net/core/bpf_sk_storage.c (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cd6341eabd74..9a21848fdb07 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -184,6 +184,7 @@ enum bpf_arg_type { ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ + ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack @@ -204,6 +205,7 @@ enum bpf_arg_type { ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ }; /* type of values returned from helper functions */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a10d37bce364..5a9975678d6f 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -61,6 +61,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #if defined(CONFIG_BPF_STREAM_PARSER) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h new file mode 100644 index 000000000000..b9dcb02e756b --- /dev/null +++ b/include/net/bpf_sk_storage.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ +#ifndef _BPF_SK_STORAGE_H +#define _BPF_SK_STORAGE_H + +struct sock; + +void bpf_sk_storage_free(struct sock *sk); + +extern const struct bpf_func_proto bpf_sk_storage_get_proto; +extern const struct bpf_func_proto bpf_sk_storage_delete_proto; + +#endif /* _BPF_SK_STORAGE_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 784cd19d5ff7..4d208c0f9c14 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -236,6 +236,8 @@ struct sock_common { /* public: */ }; +struct bpf_sk_storage; + /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -510,6 +512,9 @@ struct sock { #endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; +#ifdef CONFIG_BPF_SYSCALL + struct bpf_sk_storage __rcu *sk_bpf_storage; +#endif struct rcu_head sk_rcu; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f7fa7a34a62d..72336bac7573 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -133,6 +133,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, + BPF_MAP_TYPE_SK_STORAGE, }; /* Note that tracing related programs such as @@ -2630,6 +2631,42 @@ union bpf_attr { * was provided. * * **-ERANGE** if resulting value was out of range. + * + * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * Description + * Get a bpf-local-storage from a sk. + * + * Logically, it could be thought of getting the value from + * a *map* with *sk* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem(map, &sk)** except this + * helper enforces the key must be a **bpf_fullsock()** + * and the map must be a BPF_MAP_TYPE_SK_STORAGE also. + * + * Underneath, the value is stored locally at *sk* instead of + * the map. The *map* is used as the bpf-local-storage **type**. + * The bpf-local-storage **type** (i.e. the *map*) is searched + * against all bpf-local-storages residing at sk. + * + * An optional *flags* (BPF_SK_STORAGE_GET_F_CREATE) can be + * used such that a new bpf-local-storage will be + * created if one does not exist. *value* can be used + * together with BPF_SK_STORAGE_GET_F_CREATE to specify + * the initial value of a bpf-local-storage. If *value* is + * NULL, the new bpf-local-storage will be zero initialized. + * Return + * A bpf-local-storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf-local-storage. + * + * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * Description + * Delete a bpf-local-storage from a sk. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf-local-storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2738,7 +2775,9 @@ union bpf_attr { FN(sysctl_get_new_value), \ FN(sysctl_set_new_value), \ FN(strtol), \ - FN(strtoul), + FN(strtoul), \ + FN(sk_storage_get), \ + FN(sk_storage_delete), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2814,6 +2853,9 @@ enum bpf_func_id { /* BPF_FUNC_sysctl_get_name flags. */ #define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +/* BPF_FUNC_sk_storage_get flags */ +#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ae141e745f92..ad3ccf82f31d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -526,7 +526,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return -EACCES; if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && - map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) + map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && + map->map_type != BPF_MAP_TYPE_SK_STORAGE) return -ENOTSUPP; if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2ef442c62c0e..271717246af3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2543,10 +2543,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE || - arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || + arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { expected_type = PTR_TO_STACK; - if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && - type != expected_type) + if (register_is_null(reg) && + arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) + /* final test in check_stack_boundary() */; + else if (!type_is_pkt_pointer(type) && + type != PTR_TO_MAP_VALUE && + type != expected_type) goto err_type; } else if (arg_type == ARG_CONST_SIZE || arg_type == ARG_CONST_SIZE_OR_ZERO) { @@ -2578,6 +2583,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } meta->ref_obj_id = reg->ref_obj_id; } + } else if (arg_type == ARG_PTR_TO_SOCKET) { + expected_type = PTR_TO_SOCKET; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) @@ -2635,6 +2644,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, meta->map_ptr->key_size, false, NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE || + (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && + !register_is_null(reg)) || arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity @@ -2784,6 +2795,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_map_push_elem) goto error; break; + case BPF_MAP_TYPE_SK_STORAGE: + if (func_id != BPF_FUNC_sk_storage_get && + func_id != BPF_FUNC_sk_storage_delete) + goto error; + break; default: break; } @@ -2847,6 +2863,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_sk_storage_get: + case BPF_FUNC_sk_storage_delete: + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) + goto error; + break; default: break; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6c4694ae4241..33e0dc168c16 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -335,6 +336,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, sizeof(struct __sk_buff)); out: kfree_skb(skb); + bpf_sk_storage_free(sk); kfree(sk); kfree(ctx); return ret; diff --git a/net/core/Makefile b/net/core/Makefile index f97d6254e564..a104dc8faafc 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -34,3 +34,4 @@ obj-$(CONFIG_HWBM) += hwbm.o obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_FAILOVER) += failover.o +obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c new file mode 100644 index 000000000000..a8e9ac71b22d --- /dev/null +++ b/net/core/bpf_sk_storage.c @@ -0,0 +1,804 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static atomic_t cache_idx; + +struct bucket { + struct hlist_head list; + raw_spinlock_t lock; +}; + +/* Thp map is not the primary owner of a bpf_sk_storage_elem. + * Instead, the sk->sk_bpf_storage is. + * + * The map (bpf_sk_storage_map) is for two purposes + * 1. Define the size of the "sk local storage". It is + * the map's value_size. + * + * 2. Maintain a list to keep track of all elems such + * that they can be cleaned up during the map destruction. + * + * When a bpf local storage is being looked up for a + * particular sk, the "bpf_map" pointer is actually used + * as the "key" to search in the list of elem in + * sk->sk_bpf_storage. + * + * Hence, consider sk->sk_bpf_storage is the mini-map + * with the "bpf_map" pointer as the searching key. + */ +struct bpf_sk_storage_map { + struct bpf_map map; + /* Lookup elem does not require accessing the map. + * + * Updating/Deleting requires a bucket lock to + * link/unlink the elem from the map. Having + * multiple buckets to improve contention. + */ + struct bucket *buckets; + u32 bucket_log; + u16 elem_size; + u16 cache_idx; +}; + +struct bpf_sk_storage_data { + /* smap is used as the searching key when looking up + * from sk->sk_bpf_storage. + * + * Put it in the same cacheline as the data to minimize + * the number of cachelines access during the cache hit case. + */ + struct bpf_sk_storage_map __rcu *smap; + u8 data[0] __aligned(8); +}; + +/* Linked to bpf_sk_storage and bpf_sk_storage_map */ +struct bpf_sk_storage_elem { + struct hlist_node map_node; /* Linked to bpf_sk_storage_map */ + struct hlist_node snode; /* Linked to bpf_sk_storage */ + struct bpf_sk_storage __rcu *sk_storage; + struct rcu_head rcu; + /* 8 bytes hole */ + /* The data is stored in aother cacheline to minimize + * the number of cachelines access during a cache hit. + */ + struct bpf_sk_storage_data sdata ____cacheline_aligned; +}; + +#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata) +#define SDATA(_SELEM) (&(_SELEM)->sdata) +#define BPF_SK_STORAGE_CACHE_SIZE 16 + +struct bpf_sk_storage { + struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE]; + struct hlist_head list; /* List of bpf_sk_storage_elem */ + struct sock *sk; /* The sk that owns the the above "list" of + * bpf_sk_storage_elem. + */ + struct rcu_head rcu; + raw_spinlock_t lock; /* Protect adding/removing from the "list" */ +}; + +static struct bucket *select_bucket(struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *selem) +{ + return &smap->buckets[hash_ptr(selem, smap->bucket_log)]; +} + +static int omem_charge(struct sock *sk, unsigned int size) +{ + /* same check as in sock_kmalloc() */ + if (size <= sysctl_optmem_max && + atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { + atomic_add(size, &sk->sk_omem_alloc); + return 0; + } + + return -ENOMEM; +} + +static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem) +{ + return !hlist_unhashed(&selem->snode); +} + +static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem) +{ + return !hlist_unhashed(&selem->map_node); +} + +static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap, + struct sock *sk, void *value, + bool charge_omem) +{ + struct bpf_sk_storage_elem *selem; + + if (charge_omem && omem_charge(sk, smap->elem_size)) + return NULL; + + selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN); + if (selem) { + if (value) + memcpy(SDATA(selem)->data, value, smap->map.value_size); + return selem; + } + + if (charge_omem) + atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + + return NULL; +} + +/* sk_storage->lock must be held and selem->sk_storage == sk_storage. + * The caller must ensure selem->smap is still valid to be + * dereferenced for its smap->elem_size and smap->cache_idx. + */ +static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage, + struct bpf_sk_storage_elem *selem, + bool uncharge_omem) +{ + struct bpf_sk_storage_map *smap; + bool free_sk_storage; + struct sock *sk; + + smap = rcu_dereference(SDATA(selem)->smap); + sk = sk_storage->sk; + + /* All uncharging on sk->sk_omem_alloc must be done first. + * sk may be freed once the last selem is unlinked from sk_storage. + */ + if (uncharge_omem) + atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + + free_sk_storage = hlist_is_singular_node(&selem->snode, + &sk_storage->list); + if (free_sk_storage) { + atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc); + sk_storage->sk = NULL; + /* After this RCU_INIT, sk may be freed and cannot be used */ + RCU_INIT_POINTER(sk->sk_bpf_storage, NULL); + + /* sk_storage is not freed now. sk_storage->lock is + * still held and raw_spin_unlock_bh(&sk_storage->lock) + * will be done by the caller. + * + * Although the unlock will be done under + * rcu_read_lock(), it is more intutivie to + * read if kfree_rcu(sk_storage, rcu) is done + * after the raw_spin_unlock_bh(&sk_storage->lock). + * + * Hence, a "bool free_sk_storage" is returned + * to the caller which then calls the kfree_rcu() + * after unlock. + */ + } + hlist_del_init_rcu(&selem->snode); + if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) == + SDATA(selem)) + RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL); + + kfree_rcu(selem, rcu); + + return free_sk_storage; +} + +static void selem_unlink_sk(struct bpf_sk_storage_elem *selem) +{ + struct bpf_sk_storage *sk_storage; + bool free_sk_storage = false; + + if (unlikely(!selem_linked_to_sk(selem))) + /* selem has already been unlinked from sk */ + return; + + sk_storage = rcu_dereference(selem->sk_storage); + raw_spin_lock_bh(&sk_storage->lock); + if (likely(selem_linked_to_sk(selem))) + free_sk_storage = __selem_unlink_sk(sk_storage, selem, true); + raw_spin_unlock_bh(&sk_storage->lock); + + if (free_sk_storage) + kfree_rcu(sk_storage, rcu); +} + +/* sk_storage->lock must be held and sk_storage->list cannot be empty */ +static void __selem_link_sk(struct bpf_sk_storage *sk_storage, + struct bpf_sk_storage_elem *selem) +{ + RCU_INIT_POINTER(selem->sk_storage, sk_storage); + hlist_add_head(&selem->snode, &sk_storage->list); +} + +static void selem_unlink_map(struct bpf_sk_storage_elem *selem) +{ + struct bpf_sk_storage_map *smap; + struct bucket *b; + + if (unlikely(!selem_linked_to_map(selem))) + /* selem has already be unlinked from smap */ + return; + + smap = rcu_dereference(SDATA(selem)->smap); + b = select_bucket(smap, selem); + raw_spin_lock_bh(&b->lock); + if (likely(selem_linked_to_map(selem))) + hlist_del_init_rcu(&selem->map_node); + raw_spin_unlock_bh(&b->lock); +} + +static void selem_link_map(struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *selem) +{ + struct bucket *b = select_bucket(smap, selem); + + raw_spin_lock_bh(&b->lock); + RCU_INIT_POINTER(SDATA(selem)->smap, smap); + hlist_add_head_rcu(&selem->map_node, &b->list); + raw_spin_unlock_bh(&b->lock); +} + +static void selem_unlink(struct bpf_sk_storage_elem *selem) +{ + /* Always unlink from map before unlinking from sk_storage + * because selem will be freed after successfully unlinked from + * the sk_storage. + */ + selem_unlink_map(selem); + selem_unlink_sk(selem); +} + +static struct bpf_sk_storage_data * +__sk_storage_lookup(struct bpf_sk_storage *sk_storage, + struct bpf_sk_storage_map *smap, + bool cacheit_lockit) +{ + struct bpf_sk_storage_data *sdata; + struct bpf_sk_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference(sk_storage->cache[smap->cache_idx]); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + + sdata = SDATA(selem); + if (cacheit_lockit) { + /* spinlock is needed to avoid racing with the + * parallel delete. Otherwise, publishing an already + * deleted sdata to the cache will become a use-after-free + * problem in the next __sk_storage_lookup(). + */ + raw_spin_lock_bh(&sk_storage->lock); + if (selem_linked_to_sk(selem)) + rcu_assign_pointer(sk_storage->cache[smap->cache_idx], + sdata); + raw_spin_unlock_bh(&sk_storage->lock); + } + + return sdata; +} + +static struct bpf_sk_storage_data * +sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) +{ + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_map *smap; + + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage) + return NULL; + + smap = (struct bpf_sk_storage_map *)map; + return __sk_storage_lookup(sk_storage, smap, cacheit_lockit); +} + +static int check_flags(const struct bpf_sk_storage_data *old_sdata, + u64 map_flags) +{ + if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) + /* elem already exists */ + return -EEXIST; + + if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) + /* elem doesn't exist, cannot update it */ + return -ENOENT; + + return 0; +} + +static int sk_storage_alloc(struct sock *sk, + struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *first_selem) +{ + struct bpf_sk_storage *prev_sk_storage, *sk_storage; + int err; + + err = omem_charge(sk, sizeof(*sk_storage)); + if (err) + return err; + + sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN); + if (!sk_storage) { + err = -ENOMEM; + goto uncharge; + } + INIT_HLIST_HEAD(&sk_storage->list); + raw_spin_lock_init(&sk_storage->lock); + sk_storage->sk = sk; + + __selem_link_sk(sk_storage, first_selem); + selem_link_map(smap, first_selem); + /* Publish sk_storage to sk. sk->sk_lock cannot be acquired. + * Hence, atomic ops is used to set sk->sk_bpf_storage + * from NULL to the newly allocated sk_storage ptr. + * + * From now on, the sk->sk_bpf_storage pointer is protected + * by the sk_storage->lock. Hence, when freeing + * the sk->sk_bpf_storage, the sk_storage->lock must + * be held before setting sk->sk_bpf_storage to NULL. + */ + prev_sk_storage = cmpxchg((struct bpf_sk_storage **)&sk->sk_bpf_storage, + NULL, sk_storage); + if (unlikely(prev_sk_storage)) { + selem_unlink_map(first_selem); + err = -EAGAIN; + goto uncharge; + + /* Note that even first_selem was linked to smap's + * bucket->list, first_selem can be freed immediately + * (instead of kfree_rcu) because + * bpf_sk_storage_map_free() does a + * synchronize_rcu() before walking the bucket->list. + * Hence, no one is accessing selem from the + * bucket->list under rcu_read_lock(). + */ + } + + return 0; + +uncharge: + kfree(sk_storage); + atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc); + return err; +} + +/* sk cannot be going away because it is linking new elem + * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0). + * Otherwise, it will become a leak (and other memory issues + * during map destruction). + */ +static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk, + struct bpf_map *map, + void *value, + u64 map_flags) +{ + struct bpf_sk_storage_data *old_sdata = NULL; + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_map *smap; + int err; + + /* BPF_EXIST and BPF_NOEXIST cannot be both set */ + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) || + /* BPF_F_LOCK can only be used in a value with spin_lock */ + unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map))) + return ERR_PTR(-EINVAL); + + smap = (struct bpf_sk_storage_map *)map; + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage || hlist_empty(&sk_storage->list)) { + /* Very first elem for this sk */ + err = check_flags(NULL, map_flags); + if (err) + return ERR_PTR(err); + + selem = selem_alloc(smap, sk, value, true); + if (!selem) + return ERR_PTR(-ENOMEM); + + err = sk_storage_alloc(sk, smap, selem); + if (err) { + kfree(selem); + atomic_sub(smap->elem_size, &sk->sk_omem_alloc); + return ERR_PTR(err); + } + + return SDATA(selem); + } + + if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) { + /* Hoping to find an old_sdata to do inline update + * such that it can avoid taking the sk_storage->lock + * and changing the lists. + */ + old_sdata = __sk_storage_lookup(sk_storage, smap, false); + err = check_flags(old_sdata, map_flags); + if (err) + return ERR_PTR(err); + if (old_sdata && selem_linked_to_sk(SELEM(old_sdata))) { + copy_map_value_locked(map, old_sdata->data, + value, false); + return old_sdata; + } + } + + raw_spin_lock_bh(&sk_storage->lock); + + /* Recheck sk_storage->list under sk_storage->lock */ + if (unlikely(hlist_empty(&sk_storage->list))) { + /* A parallel del is happening and sk_storage is going + * away. It has just been checked before, so very + * unlikely. Return instead of retry to keep things + * simple. + */ + err = -EAGAIN; + goto unlock_err; + } + + old_sdata = __sk_storage_lookup(sk_storage, smap, false); + err = check_flags(old_sdata, map_flags); + if (err) + goto unlock_err; + + if (old_sdata && (map_flags & BPF_F_LOCK)) { + copy_map_value_locked(map, old_sdata->data, value, false); + selem = SELEM(old_sdata); + goto unlock; + } + + /* sk_storage->lock is held. Hence, we are sure + * we can unlink and uncharge the old_sdata successfully + * later. Hence, instead of charging the new selem now + * and then uncharge the old selem later (which may cause + * a potential but unnecessary charge failure), avoid taking + * a charge at all here (the "!old_sdata" check) and the + * old_sdata will not be uncharged later during __selem_unlink_sk(). + */ + selem = selem_alloc(smap, sk, value, !old_sdata); + if (!selem) { + err = -ENOMEM; + goto unlock_err; + } + + /* First, link the new selem to the map */ + selem_link_map(smap, selem); + + /* Second, link (and publish) the new selem to sk_storage */ + __selem_link_sk(sk_storage, selem); + + /* Third, remove old selem, SELEM(old_sdata) */ + if (old_sdata) { + selem_unlink_map(SELEM(old_sdata)); + __selem_unlink_sk(sk_storage, SELEM(old_sdata), false); + } + +unlock: + raw_spin_unlock_bh(&sk_storage->lock); + return SDATA(selem); + +unlock_err: + raw_spin_unlock_bh(&sk_storage->lock); + return ERR_PTR(err); +} + +static int sk_storage_delete(struct sock *sk, struct bpf_map *map) +{ + struct bpf_sk_storage_data *sdata; + + sdata = sk_storage_lookup(sk, map, false); + if (!sdata) + return -ENOENT; + + selem_unlink(SELEM(sdata)); + + return 0; +} + +/* Called by __sk_destruct() */ +void bpf_sk_storage_free(struct sock *sk) +{ + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage *sk_storage; + bool free_sk_storage = false; + struct hlist_node *n; + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage) { + rcu_read_unlock(); + return; + } + + /* Netiher the bpf_prog nor the bpf-map's syscall + * could be modifying the sk_storage->list now. + * Thus, no elem can be added-to or deleted-from the + * sk_storage->list by the bpf_prog or by the bpf-map's syscall. + * + * It is racing with bpf_sk_storage_map_free() alone + * when unlinking elem from the sk_storage->list and + * the map's bucket->list. + */ + raw_spin_lock_bh(&sk_storage->lock); + hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) { + /* Always unlink from map before unlinking from + * sk_storage. + */ + selem_unlink_map(selem); + free_sk_storage = __selem_unlink_sk(sk_storage, selem, true); + } + raw_spin_unlock_bh(&sk_storage->lock); + rcu_read_unlock(); + + if (free_sk_storage) + kfree_rcu(sk_storage, rcu); +} + +static void bpf_sk_storage_map_free(struct bpf_map *map) +{ + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage_map *smap; + struct bucket *b; + unsigned int i; + + smap = (struct bpf_sk_storage_map *)map; + + synchronize_rcu(); + + /* bpf prog and the userspace can no longer access this map + * now. No new selem (of this map) can be added + * to the sk->sk_bpf_storage or to the map bucket's list. + * + * The elem of this map can be cleaned up here + * or + * by bpf_sk_storage_free() during __sk_destruct(). + */ + for (i = 0; i < (1U << smap->bucket_log); i++) { + b = &smap->buckets[i]; + + rcu_read_lock(); + /* No one is adding to b->list now */ + while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)), + struct bpf_sk_storage_elem, + map_node))) { + selem_unlink(selem); + cond_resched_rcu(); + } + rcu_read_unlock(); + } + + /* bpf_sk_storage_free() may still need to access the map. + * e.g. bpf_sk_storage_free() has unlinked selem from the map + * which then made the above while((selem = ...)) loop + * exited immediately. + * + * However, the bpf_sk_storage_free() still needs to access + * the smap->elem_size to do the uncharging in + * __selem_unlink_sk(). + * + * Hence, wait another rcu grace period for the + * bpf_sk_storage_free() to finish. + */ + synchronize_rcu(); + + kvfree(smap->buckets); + kfree(map); +} + +static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) +{ + if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries || + attr->key_size != sizeof(int) || !attr->value_size || + /* Enforce BTF for userspace sk dumping */ + !attr->btf_key_type_id || !attr->btf_value_type_id) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (attr->value_size >= KMALLOC_MAX_SIZE - + MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) || + /* U16_MAX is much more than enough for sk local storage + * considering a tcp_sock is ~2k. + */ + attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem)) + return -E2BIG; + + return 0; +} + +static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_sk_storage_map *smap; + unsigned int i; + u32 nbuckets; + u64 cost; + + smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN); + if (!smap) + return ERR_PTR(-ENOMEM); + bpf_map_init_from_attr(&smap->map, attr); + + smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus())); + nbuckets = 1U << smap->bucket_log; + smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, + GFP_USER | __GFP_NOWARN); + if (!smap->buckets) { + kfree(smap); + return ERR_PTR(-ENOMEM); + } + cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); + + for (i = 0; i < nbuckets; i++) { + INIT_HLIST_HEAD(&smap->buckets[i].list); + raw_spin_lock_init(&smap->buckets[i].lock); + } + + smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; + smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % + BPF_SK_STORAGE_CACHE_SIZE; + smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + return &smap->map; +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + return -ENOTSUPP; +} + +static int bpf_sk_storage_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + u32 int_data; + + if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) + return -EINVAL; + + int_data = *(u32 *)(key_type + 1); + if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) + return -EINVAL; + + return 0; +} + +static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_sk_storage_data *sdata; + struct socket *sock; + int fd, err; + + fd = *(int *)key; + sock = sockfd_lookup(fd, &err); + if (sock) { + sdata = sk_storage_lookup(sock->sk, map, true); + sockfd_put(sock); + return sdata ? sdata->data : NULL; + } + + return ERR_PTR(err); +} + +static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_sk_storage_data *sdata; + struct socket *sock; + int fd, err; + + fd = *(int *)key; + sock = sockfd_lookup(fd, &err); + if (sock) { + sdata = sk_storage_update(sock->sk, map, value, map_flags); + sockfd_put(sock); + return IS_ERR(sdata) ? PTR_ERR(sdata) : 0; + } + + return err; +} + +static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct socket *sock; + int fd, err; + + fd = *(int *)key; + sock = sockfd_lookup(fd, &err); + if (sock) { + err = sk_storage_delete(sock->sk, map); + sockfd_put(sock); + return err; + } + + return err; +} + +BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, + void *, value, u64, flags) +{ + struct bpf_sk_storage_data *sdata; + + if (flags > BPF_SK_STORAGE_GET_F_CREATE) + return (unsigned long)NULL; + + sdata = sk_storage_lookup(sk, map, true); + if (sdata) + return (unsigned long)sdata->data; + + if (flags == BPF_SK_STORAGE_GET_F_CREATE && + /* Cannot add new elem to a going away sk. + * Otherwise, the new elem may become a leak + * (and also other memory issues during map + * destruction). + */ + refcount_inc_not_zero(&sk->sk_refcnt)) { + sdata = sk_storage_update(sk, map, value, BPF_NOEXIST); + /* sk must be a fullsock (guaranteed by verifier), + * so sock_gen_put() is unnecessary. + */ + sock_put(sk); + return IS_ERR(sdata) ? + (unsigned long)NULL : (unsigned long)sdata->data; + } + + return (unsigned long)NULL; +} + +BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) +{ + if (refcount_inc_not_zero(&sk->sk_refcnt)) { + int err; + + err = sk_storage_delete(sk, map); + sock_put(sk); + return err; + } + + return -ENOENT; +} + +const struct bpf_map_ops sk_storage_map_ops = { + .map_alloc_check = bpf_sk_storage_map_alloc_check, + .map_alloc = bpf_sk_storage_map_alloc, + .map_free = bpf_sk_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, + .map_update_elem = bpf_fd_sk_storage_update_elem, + .map_delete_elem = bpf_fd_sk_storage_delete_elem, + .map_check_btf = bpf_sk_storage_map_check_btf, +}; + +const struct bpf_func_proto bpf_sk_storage_get_proto = { + .func = bpf_sk_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_SOCKET, + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_sk_storage_delete_proto = { + .func = bpf_sk_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_SOCKET, +}; diff --git a/net/core/filter.c b/net/core/filter.c index 2f88baf39cc2..27b0dc01dc3f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -75,6 +75,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -5903,6 +5904,9 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +const struct bpf_func_proto bpf_sk_storage_get_proto __weak; +const struct bpf_func_proto bpf_sk_storage_delete_proto __weak; + static const struct bpf_func_proto * cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -5911,6 +5915,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_local_storage_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_proto; + case BPF_FUNC_sk_storage_delete: + return &bpf_sk_storage_delete_proto; #ifdef CONFIG_INET case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; @@ -5992,6 +6000,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_fib_lookup_proto; case BPF_FUNC_sk_fullsock: return &bpf_sk_fullsock_proto; + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_proto; + case BPF_FUNC_sk_storage_delete: + return &bpf_sk_storage_delete_proto; #ifdef CONFIG_XFRM case BPF_FUNC_skb_get_xfrm_state: return &bpf_skb_get_xfrm_state_proto; diff --git a/net/core/sock.c b/net/core/sock.c index 443b98d05f1e..9773be724aa9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -137,6 +137,7 @@ #include #include +#include #include @@ -1709,6 +1710,10 @@ static void __sk_destruct(struct rcu_head *head) sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); +#ifdef CONFIG_BPF_SYSCALL + bpf_sk_storage_free(sk); +#endif + if (atomic_read(&sk->sk_omem_alloc)) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); -- cgit From ae0be8de9a53cda3505865c11826d8ff0640237c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Fri, 26 Apr 2019 11:13:06 +0200 Subject: netlink: make nla_nest_start() add NLA_F_NESTED flag Even if the NLA_F_NESTED flag was introduced more than 11 years ago, most netlink based interfaces (including recently added ones) are still not setting it in kernel generated messages. Without the flag, message parsers not aware of attribute semantics (e.g. wireshark dissector or libmnl's mnl_nlmsg_fprintf()) cannot recognize nested attributes and won't display the structure of their contents. Unfortunately we cannot just add the flag everywhere as there may be userspace applications which check nlattr::nla_type directly rather than through a helper masking out the flags. Therefore the patch renames nla_nest_start() to nla_nest_start_noflag() and introduces nla_nest_start() as a wrapper adding NLA_F_NESTED. The calls which add NLA_F_NESTED manually are rewritten to use nla_nest_start(). Except for changes in include/net/netlink.h, the patch was generated using this semantic patch: @@ expression E1, E2; @@ -nla_nest_start(E1, E2) +nla_nest_start_noflag(E1, E2) @@ expression E1, E2; @@ -nla_nest_start_noflag(E1, E2 | NLA_F_NESTED) +nla_nest_start(E1, E2) Signed-off-by: Michal Kubecek Acked-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/block/drbd/drbd_nl.c | 8 +- drivers/block/nbd.c | 4 +- drivers/infiniband/core/nldev.c | 9 +- drivers/infiniband/hw/cxgb4/restrack.c | 8 +- drivers/net/bonding/bond_netlink.c | 8 +- drivers/net/ieee802154/mac802154_hwsim.c | 6 +- drivers/net/macsec.c | 27 ++-- drivers/net/macvlan.c | 2 +- drivers/net/team/team.c | 8 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 4 +- include/linux/netfilter/ipset/ip_set.h | 2 +- include/net/netlink.h | 26 +++- kernel/taskstats.c | 2 +- net/8021q/vlan_netlink.c | 4 +- net/bridge/br_mdb.c | 17 +-- net/bridge/br_netlink.c | 6 +- net/bridge/br_netlink_tunnel.c | 2 +- net/core/devlink.c | 78 ++++++----- net/core/lwt_bpf.c | 2 +- net/core/lwtunnel.c | 2 +- net/core/neighbour.c | 2 +- net/core/rtnetlink.c | 48 +++---- net/dcb/dcbnl.c | 40 +++--- net/decnet/dn_table.c | 2 +- net/ieee802154/nl802154.c | 34 ++--- net/ipv4/fib_semantics.c | 2 +- net/ipv4/ipmr.c | 6 +- net/ipv4/ipmr_base.c | 2 +- net/ipv4/tcp_metrics.c | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/route.c | 2 +- net/ipv6/seg6_local.c | 2 +- net/l2tp/l2tp_netlink.c | 4 +- net/mpls/af_mpls.c | 2 +- net/ncsi/ncsi-netlink.c | 12 +- net/netfilter/ipvs/ip_vs_ctl.c | 10 +- net/netfilter/nf_conntrack_netlink.c | 40 +++--- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_tables_api.c | 29 +++-- net/netfilter/nfnetlink_cthelper.c | 7 +- net/netfilter/nfnetlink_cttimeout.c | 4 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/nft_ct.c | 2 +- net/netfilter/nft_tunnel.c | 6 +- net/netlabel/netlabel_cipso_v4.c | 14 +- net/netlabel/netlabel_mgmt.c | 8 +- net/netlink/genetlink.c | 12 +- net/nfc/netlink.c | 4 +- net/openvswitch/conntrack.c | 6 +- net/openvswitch/datapath.c | 7 +- net/openvswitch/flow_netlink.c | 33 ++--- net/openvswitch/meter.c | 8 +- net/openvswitch/vport-vxlan.c | 2 +- net/openvswitch/vport.c | 2 +- net/packet/diag.c | 2 +- net/sched/act_api.c | 14 +- net/sched/act_ife.c | 2 +- net/sched/act_pedit.c | 5 +- net/sched/act_tunnel_key.c | 4 +- net/sched/cls_api.c | 4 +- net/sched/cls_basic.c | 2 +- net/sched/cls_bpf.c | 2 +- net/sched/cls_cgroup.c | 2 +- net/sched/cls_flow.c | 2 +- net/sched/cls_flower.c | 8 +- net/sched/cls_fw.c | 2 +- net/sched/cls_matchall.c | 2 +- net/sched/cls_route.c | 2 +- net/sched/cls_rsvp.h | 2 +- net/sched/cls_tcindex.c | 2 +- net/sched/cls_u32.c | 2 +- net/sched/ematch.c | 4 +- net/sched/sch_api.c | 2 +- net/sched/sch_atm.c | 2 +- net/sched/sch_cake.c | 10 +- net/sched/sch_cbq.c | 4 +- net/sched/sch_cbs.c | 2 +- net/sched/sch_choke.c | 2 +- net/sched/sch_codel.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_dsmark.c | 4 +- net/sched/sch_etf.c | 2 +- net/sched/sch_fq.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_gred.c | 8 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_hhf.c | 2 +- net/sched/sch_htb.c | 4 +- net/sched/sch_ingress.c | 2 +- net/sched/sch_mqprio.c | 4 +- net/sched/sch_netem.c | 2 +- net/sched/sch_pie.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_red.c | 2 +- net/sched/sch_sfb.c | 2 +- net/sched/sch_taprio.c | 7 +- net/sched/sch_tbf.c | 2 +- net/tipc/bearer.c | 8 +- net/tipc/group.c | 2 +- net/tipc/link.c | 12 +- net/tipc/monitor.c | 4 +- net/tipc/name_table.c | 4 +- net/tipc/net.c | 2 +- net/tipc/netlink_compat.c | 24 ++-- net/tipc/node.c | 4 +- net/tipc/socket.c | 10 +- net/tipc/udp_media.c | 2 +- net/wireless/nl80211.c | 192 +++++++++++++++------------- net/wireless/pmsr.c | 12 +- 111 files changed, 539 insertions(+), 466 deletions(-) (limited to 'kernel') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f2471172a961..1cb5a0b85fd9 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -114,7 +114,7 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info) if (!info || !info[0]) return 0; - nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); + nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY); if (!nla) return err; @@ -135,7 +135,7 @@ static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...) int err = -EMSGSIZE; int len; - nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY); + nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_REPLY); if (!nla) return err; @@ -3269,7 +3269,7 @@ static int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_device *device) { struct nlattr *nla; - nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT); + nla = nla_nest_start_noflag(skb, DRBD_NLA_CFG_CONTEXT); if (!nla) goto nla_put_failure; if (device && @@ -3837,7 +3837,7 @@ static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, if (err) goto nla_put_failure; - nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO); + nla = nla_nest_start_noflag(skb, DRBD_NLA_STATE_INFO); if (!nla) goto nla_put_failure; if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) || diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 92b8aafb8bb4..cd27f236431d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2047,7 +2047,7 @@ static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) */ if (refcount_read(&nbd->config_refs)) connected = 1; - dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); + dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM); if (!dev_opt) return -EMSGSIZE; ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); @@ -2095,7 +2095,7 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) goto out; } - dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); + dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) { diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 11ed58d3fce5..ad189a29cc67 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -292,7 +292,8 @@ static int fill_res_info_entry(struct sk_buff *msg, { struct nlattr *entry_attr; - entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); + entry_attr = nla_nest_start_noflag(msg, + RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); if (!entry_attr) return -EMSGSIZE; @@ -327,7 +328,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device) if (fill_nldev_handle(msg, device)) return -EMSGSIZE; - table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); if (!table_attr) return -EMSGSIZE; @@ -1108,7 +1109,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, goto err; } - table_attr = nla_nest_start(skb, fe->nldev_attr); + table_attr = nla_nest_start_noflag(skb, fe->nldev_attr); if (!table_attr) { ret = -EMSGSIZE; goto err; @@ -1134,7 +1135,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, filled = true; - entry_attr = nla_nest_start(skb, fe->entry); + entry_attr = nla_nest_start_noflag(skb, fe->entry); if (!entry_attr) { ret = -EMSGSIZE; rdma_restrack_put(res); diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index 9a7520ee41e0..f82d46ed969d 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -149,7 +149,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, if (qhp->ucontext) return 0; - table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); if (!table_attr) goto err; @@ -216,7 +216,7 @@ static int fill_res_ep_entry(struct sk_buff *msg, if (!uep) return 0; - table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); if (!table_attr) goto err_free_uep; @@ -387,7 +387,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, if (ibcq->uobject) return 0; - table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); if (!table_attr) goto err; @@ -447,7 +447,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, if (!stag) return 0; - table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER); if (!table_attr) goto err; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index b286f591242e..022044b59d6a 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -546,7 +546,7 @@ static int bond_fill_info(struct sk_buff *skb, if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval)) goto nla_put_failure; - targets = nla_nest_start(skb, IFLA_BOND_ARP_IP_TARGET); + targets = nla_nest_start_noflag(skb, IFLA_BOND_ARP_IP_TARGET); if (!targets) goto nla_put_failure; @@ -644,7 +644,7 @@ static int bond_fill_info(struct sk_buff *skb, if (!bond_3ad_get_active_agg_info(bond, &info)) { struct nlattr *nest; - nest = nla_nest_start(skb, IFLA_BOND_AD_INFO); + nest = nla_nest_start_noflag(skb, IFLA_BOND_AD_INFO); if (!nest) goto nla_put_failure; @@ -711,7 +711,7 @@ static int bond_fill_linkxstats(struct sk_buff *skb, return -EINVAL; } - nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BOND); + nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BOND); if (!nest) return -EMSGSIZE; if (BOND_MODE(bond) == BOND_MODE_8023AD) { @@ -722,7 +722,7 @@ static int bond_fill_linkxstats(struct sk_buff *skb, else stats = &BOND_AD_INFO(bond).stats; - nest2 = nla_nest_start(skb, BOND_XSTATS_3AD); + nest2 = nla_nest_start_noflag(skb, BOND_XSTATS_3AD); if (!nest2) { nla_nest_end(skb, nest); return -EMSGSIZE; diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 707285953750..80ca300aba04 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -227,14 +227,16 @@ static int append_radio_msg(struct sk_buff *skb, struct hwsim_phy *phy) return 0; } - nl_edges = nla_nest_start(skb, MAC802154_HWSIM_ATTR_RADIO_EDGES); + nl_edges = nla_nest_start_noflag(skb, + MAC802154_HWSIM_ATTR_RADIO_EDGES); if (!nl_edges) { rcu_read_unlock(); return -ENOBUFS; } list_for_each_entry_rcu(e, &phy->edges, list) { - nl_edge = nla_nest_start(skb, MAC802154_HWSIM_ATTR_RADIO_EDGE); + nl_edge = nla_nest_start_noflag(skb, + MAC802154_HWSIM_ATTR_RADIO_EDGE); if (!nl_edge) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edges); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 263bfafdb004..8dedb9a9781e 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2365,7 +2365,8 @@ copy_secy_stats(struct sk_buff *skb, struct pcpu_secy_stats __percpu *pstats) static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb) { struct macsec_tx_sc *tx_sc = &secy->tx_sc; - struct nlattr *secy_nest = nla_nest_start(skb, MACSEC_ATTR_SECY); + struct nlattr *secy_nest = nla_nest_start_noflag(skb, + MACSEC_ATTR_SECY); u64 csid; if (!secy_nest) @@ -2435,7 +2436,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, if (nla_put_secy(secy, skb)) goto nla_put_failure; - attr = nla_nest_start(skb, MACSEC_ATTR_TXSC_STATS); + attr = nla_nest_start_noflag(skb, MACSEC_ATTR_TXSC_STATS); if (!attr) goto nla_put_failure; if (copy_tx_sc_stats(skb, tx_sc->stats)) { @@ -2444,7 +2445,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, } nla_nest_end(skb, attr); - attr = nla_nest_start(skb, MACSEC_ATTR_SECY_STATS); + attr = nla_nest_start_noflag(skb, MACSEC_ATTR_SECY_STATS); if (!attr) goto nla_put_failure; if (copy_secy_stats(skb, macsec_priv(dev)->stats)) { @@ -2453,7 +2454,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, } nla_nest_end(skb, attr); - txsa_list = nla_nest_start(skb, MACSEC_ATTR_TXSA_LIST); + txsa_list = nla_nest_start_noflag(skb, MACSEC_ATTR_TXSA_LIST); if (!txsa_list) goto nla_put_failure; for (i = 0, j = 1; i < MACSEC_NUM_AN; i++) { @@ -2463,7 +2464,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, if (!tx_sa) continue; - txsa_nest = nla_nest_start(skb, j++); + txsa_nest = nla_nest_start_noflag(skb, j++); if (!txsa_nest) { nla_nest_cancel(skb, txsa_list); goto nla_put_failure; @@ -2478,7 +2479,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, goto nla_put_failure; } - attr = nla_nest_start(skb, MACSEC_SA_ATTR_STATS); + attr = nla_nest_start_noflag(skb, MACSEC_SA_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); @@ -2496,7 +2497,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, } nla_nest_end(skb, txsa_list); - rxsc_list = nla_nest_start(skb, MACSEC_ATTR_RXSC_LIST); + rxsc_list = nla_nest_start_noflag(skb, MACSEC_ATTR_RXSC_LIST); if (!rxsc_list) goto nla_put_failure; @@ -2504,7 +2505,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, for_each_rxsc_rtnl(secy, rx_sc) { int k; struct nlattr *rxsa_list; - struct nlattr *rxsc_nest = nla_nest_start(skb, j++); + struct nlattr *rxsc_nest = nla_nest_start_noflag(skb, j++); if (!rxsc_nest) { nla_nest_cancel(skb, rxsc_list); @@ -2519,7 +2520,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, goto nla_put_failure; } - attr = nla_nest_start(skb, MACSEC_RXSC_ATTR_STATS); + attr = nla_nest_start_noflag(skb, MACSEC_RXSC_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); @@ -2533,7 +2534,8 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, } nla_nest_end(skb, attr); - rxsa_list = nla_nest_start(skb, MACSEC_RXSC_ATTR_SA_LIST); + rxsa_list = nla_nest_start_noflag(skb, + MACSEC_RXSC_ATTR_SA_LIST); if (!rxsa_list) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); @@ -2547,7 +2549,7 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, if (!rx_sa) continue; - rxsa_nest = nla_nest_start(skb, k++); + rxsa_nest = nla_nest_start_noflag(skb, k++); if (!rxsa_nest) { nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); @@ -2555,7 +2557,8 @@ dump_secy(struct macsec_secy *secy, struct net_device *dev, goto nla_put_failure; } - attr = nla_nest_start(skb, MACSEC_SA_ATTR_STATS); + attr = nla_nest_start_noflag(skb, + MACSEC_SA_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 4a6be8fab884..b395423b19bc 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1624,7 +1624,7 @@ static int macvlan_fill_info(struct sk_buff *skb, if (nla_put_u32(skb, IFLA_MACVLAN_MACADDR_COUNT, vlan->macaddr_count)) goto nla_put_failure; if (vlan->macaddr_count > 0) { - nest = nla_nest_start(skb, IFLA_MACVLAN_MACADDR_DATA); + nest = nla_nest_start_noflag(skb, IFLA_MACVLAN_MACADDR_DATA); if (nest == NULL) goto nla_put_failure; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index eb4711bfc52a..6306897c147f 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2290,7 +2290,7 @@ static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team, if (err) return err; - option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION); + option_item = nla_nest_start_noflag(skb, TEAM_ATTR_ITEM_OPTION); if (!option_item) return -EMSGSIZE; @@ -2404,7 +2404,7 @@ start_again: if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; - option_list = nla_nest_start(skb, TEAM_ATTR_LIST_OPTION); + option_list = nla_nest_start_noflag(skb, TEAM_ATTR_LIST_OPTION); if (!option_list) goto nla_put_failure; @@ -2626,7 +2626,7 @@ static int team_nl_fill_one_port_get(struct sk_buff *skb, { struct nlattr *port_item; - port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT); + port_item = nla_nest_start_noflag(skb, TEAM_ATTR_ITEM_PORT); if (!port_item) goto nest_cancel; if (nla_put_u32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex)) @@ -2681,7 +2681,7 @@ start_again: if (nla_put_u32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex)) goto nla_put_failure; - port_list = nla_nest_start(skb, TEAM_ATTR_LIST_PORT); + port_list = nla_nest_start_noflag(skb, TEAM_ATTR_LIST_PORT); if (!port_list) goto nla_put_failure; diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index a1e226652b4a..cac18e61474e 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -2679,13 +2679,13 @@ static int wil_rf_sector_get_cfg(struct wiphy *wiphy, QCA_ATTR_PAD)) goto nla_put_failure; - nl_cfgs = nla_nest_start(msg, QCA_ATTR_DMG_RF_SECTOR_CFG); + nl_cfgs = nla_nest_start_noflag(msg, QCA_ATTR_DMG_RF_SECTOR_CFG); if (!nl_cfgs) goto nla_put_failure; for (i = 0; i < WMI_MAX_RF_MODULES_NUM; i++) { if (!(rf_modules_vec & BIT(i))) continue; - nl_cfg = nla_nest_start(msg, i); + nl_cfg = nla_nest_start_noflag(msg, i); if (!nl_cfg) goto nla_put_failure; si = &reply.evt.sectors_info[i]; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index f2e1e6b13ca4..965dc6c6653e 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -401,7 +401,7 @@ ip_set_get_h16(const struct nlattr *attr) return ntohs(nla_get_be16(attr)); } -#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr | NLA_F_NESTED) +#define ipset_nest_start(skb, attr) nla_nest_start(skb, attr) #define ipset_nest_end(skb, start) nla_nest_end(skb, start) static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr) diff --git a/include/net/netlink.h b/include/net/netlink.h index 23f27b0b3cef..1f18b47f41b4 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1415,13 +1415,18 @@ static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) } /** - * nla_nest_start - Start a new level of nested attributes + * nla_nest_start_noflag - Start a new level of nested attributes * @skb: socket buffer to add attributes to * @attrtype: attribute type of container * - * Returns the container attribute + * This function exists for backward compatibility to use in APIs which never + * marked their nest attributes with NLA_F_NESTED flag. New APIs should use + * nla_nest_start() which sets the flag. + * + * Returns the container attribute or NULL on error */ -static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb, + int attrtype) { struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); @@ -1431,6 +1436,21 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) return start; } +/** + * nla_nest_start - Start a new level of nested attributes, with NLA_F_NESTED + * @skb: socket buffer to add attributes to + * @attrtype: attribute type of container + * + * Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED + * flag. This is the preferred function to use in new code. + * + * Returns the container attribute or NULL on error + */ +static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) +{ + return nla_nest_start_noflag(skb, attrtype | NLA_F_NESTED); +} + /** * nla_nest_end - Finalize nesting of attributes * @skb: socket buffer the attributes are stored in diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 1b942a7caf26..ef4f9cd980fd 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -375,7 +375,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; - na = nla_nest_start(skb, aggr); + na = nla_nest_start_noflag(skb, aggr); if (!na) goto err; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index a624dccf68fd..ab4921e7797b 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -227,7 +227,7 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } if (vlan->nr_ingress_mappings) { - nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS); + nest = nla_nest_start_noflag(skb, IFLA_VLAN_INGRESS_QOS); if (nest == NULL) goto nla_put_failure; @@ -245,7 +245,7 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) } if (vlan->nr_egress_mappings) { - nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS); + nest = nla_nest_start_noflag(skb, IFLA_VLAN_EGRESS_QOS); if (nest == NULL) goto nla_put_failure; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index f69c8d91dc81..3619c1a12a77 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -26,14 +26,14 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (!br->multicast_router || hlist_empty(&br->router_list)) return 0; - nest = nla_nest_start(skb, MDBA_ROUTER); + nest = nla_nest_start_noflag(skb, MDBA_ROUTER); if (nest == NULL) return -EMSGSIZE; hlist_for_each_entry_rcu(p, &br->router_list, rlist) { if (!p) continue; - port_nest = nla_nest_start(skb, MDBA_ROUTER_PORT); + port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT); if (!port_nest) goto fail; if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) || @@ -86,7 +86,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - nest = nla_nest_start(skb, MDBA_MDB); + nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) return -EMSGSIZE; @@ -98,7 +98,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (idx < s_idx) goto skip; - nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (!nest2) { err = -EMSGSIZE; break; @@ -124,7 +124,8 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, e.addr.u.ip6 = p->addr.u.ip6; #endif e.addr.proto = p->addr.proto; - nest_ent = nla_nest_start(skb, MDBA_MDB_ENTRY_INFO); + nest_ent = nla_nest_start_noflag(skb, + MDBA_MDB_ENTRY_INFO); if (!nest_ent) { nla_nest_cancel(skb, nest2); err = -EMSGSIZE; @@ -248,10 +249,10 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; - nest = nla_nest_start(skb, MDBA_MDB); + nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) goto cancel; - nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + nest2 = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); if (nest2 == NULL) goto end; @@ -444,7 +445,7 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb, memset(bpm, 0, sizeof(*bpm)); bpm->family = AF_BRIDGE; bpm->ifindex = dev->ifindex; - nest = nla_nest_start(skb, MDBA_ROUTER); + nest = nla_nest_start_noflag(skb, MDBA_ROUTER); if (!nest) goto cancel; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8dfcc2d285d8..0914477c4719 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -414,7 +414,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, if (event == RTM_NEWLINK && port) { struct nlattr *nest - = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + = nla_nest_start(skb, IFLA_PROTINFO); if (nest == NULL || br_port_fill_attrs(skb, port) < 0) goto nla_put_failure; @@ -439,7 +439,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, rcu_read_unlock(); goto done; } - af = nla_nest_start(skb, IFLA_AF_SPEC); + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af) { rcu_read_unlock(); goto nla_put_failure; @@ -1569,7 +1569,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, return -EINVAL; } - nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); + nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BRIDGE); if (!nest) return -EMSGSIZE; diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index da8cb99fd259..787e140dc4b5 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -97,7 +97,7 @@ static int br_fill_vlan_tinfo(struct sk_buff *skb, u16 vid, __be32 tid = tunnel_id_to_key32(tunnel_id); struct nlattr *tmap; - tmap = nla_nest_start(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); + tmap = nla_nest_start_noflag(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); if (!tmap) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID, diff --git a/net/core/devlink.c b/net/core/devlink.c index 7b91605e75d6..b94f326f5f06 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1671,7 +1671,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_field *field = &header->fields[match->field_id]; struct nlattr *match_attr; - match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH); + match_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_MATCH); if (!match_attr) return -EMSGSIZE; @@ -1696,7 +1696,8 @@ static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table, { struct nlattr *matches_attr; - matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES); + matches_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_TABLE_MATCHES); if (!matches_attr) return -EMSGSIZE; @@ -1718,7 +1719,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_field *field = &header->fields[action->field_id]; struct nlattr *action_attr; - action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION); + action_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ACTION); if (!action_attr) return -EMSGSIZE; @@ -1743,7 +1744,8 @@ static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table, { struct nlattr *actions_attr; - actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); + actions_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); if (!actions_attr) return -EMSGSIZE; @@ -1765,7 +1767,7 @@ static int devlink_dpipe_table_put(struct sk_buff *skb, u64 table_size; table_size = table->table_ops->size_get(table->priv); - table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE); + table_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLE); if (!table_attr) return -EMSGSIZE; @@ -1845,7 +1847,7 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; - tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES); + tables_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_TABLES); if (!tables_attr) goto nla_put_failure; @@ -1946,8 +1948,8 @@ static int devlink_dpipe_action_values_put(struct sk_buff *skb, int err; for (i = 0; i < values_count; i++) { - action_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_ACTION_VALUE); + action_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_ACTION_VALUE); if (!action_attr) return -EMSGSIZE; err = devlink_dpipe_action_value_put(skb, &values[i]); @@ -1983,8 +1985,8 @@ static int devlink_dpipe_match_values_put(struct sk_buff *skb, int err; for (i = 0; i < values_count; i++) { - match_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_MATCH_VALUE); + match_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_MATCH_VALUE); if (!match_attr) return -EMSGSIZE; err = devlink_dpipe_match_value_put(skb, &values[i]); @@ -2005,7 +2007,7 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb, struct nlattr *entry_attr, *matches_attr, *actions_attr; int err; - entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY); + entry_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_ENTRY); if (!entry_attr) return -EMSGSIZE; @@ -2017,8 +2019,8 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb, entry->counter, DEVLINK_ATTR_PAD)) goto nla_put_failure; - matches_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); + matches_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); if (!matches_attr) goto nla_put_failure; @@ -2030,8 +2032,8 @@ static int devlink_dpipe_entry_put(struct sk_buff *skb, } nla_nest_end(skb, matches_attr); - actions_attr = nla_nest_start(skb, - DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); + actions_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); if (!actions_attr) goto nla_put_failure; @@ -2088,8 +2090,8 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) devlink = dump_ctx->info->user_ptr[0]; if (devlink_nl_put_handle(dump_ctx->skb, devlink)) goto nla_put_failure; - dump_ctx->nest = nla_nest_start(dump_ctx->skb, - DEVLINK_ATTR_DPIPE_ENTRIES); + dump_ctx->nest = nla_nest_start_noflag(dump_ctx->skb, + DEVLINK_ATTR_DPIPE_ENTRIES); if (!dump_ctx->nest) goto nla_put_failure; return 0; @@ -2199,7 +2201,8 @@ static int devlink_dpipe_fields_put(struct sk_buff *skb, for (i = 0; i < header->fields_count; i++) { field = &header->fields[i]; - field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD); + field_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_FIELD); if (!field_attr) return -EMSGSIZE; if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) || @@ -2222,7 +2225,7 @@ static int devlink_dpipe_header_put(struct sk_buff *skb, struct nlattr *fields_attr, *header_attr; int err; - header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER); + header_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADER); if (!header_attr) return -EMSGSIZE; @@ -2231,7 +2234,8 @@ static int devlink_dpipe_header_put(struct sk_buff *skb, nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) goto nla_put_failure; - fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS); + fields_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_DPIPE_HEADER_FIELDS); if (!fields_attr) goto nla_put_failure; @@ -2278,7 +2282,7 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; - headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS); + headers_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_DPIPE_HEADERS); if (!headers_attr) goto nla_put_failure; @@ -2502,7 +2506,7 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, struct nlattr *child_resource_attr; struct nlattr *resource_attr; - resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE); + resource_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE); if (!resource_attr) return -EMSGSIZE; @@ -2526,7 +2530,8 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, resource->size_valid)) goto nla_put_failure; - child_resource_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST); + child_resource_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_RESOURCE_LIST); if (!child_resource_attr) goto nla_put_failure; @@ -2577,7 +2582,8 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; - resources_attr = nla_nest_start(skb, DEVLINK_ATTR_RESOURCE_LIST); + resources_attr = nla_nest_start_noflag(skb, + DEVLINK_ATTR_RESOURCE_LIST); if (!resources_attr) goto nla_put_failure; @@ -2831,7 +2837,8 @@ devlink_nl_param_value_fill_one(struct sk_buff *msg, { struct nlattr *param_value_attr; - param_value_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUE); + param_value_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_PARAM_VALUE); if (!param_value_attr) goto nla_put_failure; @@ -2922,7 +2929,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index)) goto genlmsg_cancel; - param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM); + param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM); if (!param_attr) goto genlmsg_cancel; if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name)) @@ -2936,7 +2943,8 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type)) goto param_nest_cancel; - param_values_list = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUES_LIST); + param_values_list = nla_nest_start_noflag(msg, + DEVLINK_ATTR_PARAM_VALUES_LIST); if (!param_values_list) goto param_nest_cancel; @@ -3336,7 +3344,7 @@ static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, struct nlattr *snap_attr; int err; - snap_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOT); + snap_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_SNAPSHOT); if (!snap_attr) return -EINVAL; @@ -3360,7 +3368,8 @@ static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg, struct nlattr *snapshots_attr; int err; - snapshots_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOTS); + snapshots_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_REGION_SNAPSHOTS); if (!snapshots_attr) return -EINVAL; @@ -3576,7 +3585,7 @@ static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg, struct nlattr *chunk_attr; int err; - chunk_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_CHUNK); + chunk_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_REGION_CHUNK); if (!chunk_attr) return -EINVAL; @@ -3709,7 +3718,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, if (err) goto nla_put_failure; - chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS); + chunks_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_REGION_CHUNKS); if (!chunks_attr) { err = -EMSGSIZE; goto nla_put_failure; @@ -3785,7 +3794,7 @@ static int devlink_info_version_put(struct devlink_info_req *req, int attr, struct nlattr *nest; int err; - nest = nla_nest_start(req->msg, attr); + nest = nla_nest_start_noflag(req->msg, attr); if (!nest) return -EMSGSIZE; @@ -4313,7 +4322,7 @@ devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb, int i = 0; int err; - fmsg_nlattr = nla_nest_start(skb, DEVLINK_ATTR_FMSG); + fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG); if (!fmsg_nlattr) return -EMSGSIZE; @@ -4665,7 +4674,8 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; - reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER); + reporter_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_HEALTH_REPORTER); if (!reporter_attr) goto genlmsg_cancel; if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 3c5c24a5d9f5..bbdfc8db1960 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -453,7 +453,7 @@ static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr, if (!prog->prog) return 0; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (!nest) return -EMSGSIZE; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 94749e0e2cfd..69e249fbc02f 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -237,7 +237,7 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; - nest = nla_nest_start(skb, encap_attr); + nest = nla_nest_start_noflag(skb, encap_attr); if (!nest) return -EMSGSIZE; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 997cfa8f99ba..efd0b53d9ca4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1979,7 +1979,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) { struct nlattr *nest; - nest = nla_nest_start(skb, NDTA_PARMS); + nest = nla_nest_start_noflag(skb, NDTA_PARMS); if (nest == NULL) return -ENOBUFS; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5fa5bf3e9945..8ad44b299e72 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -634,7 +634,7 @@ static int rtnl_link_slave_info_fill(struct sk_buff *skb, if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0) return -EMSGSIZE; if (ops->fill_slave_info) { - slave_data = nla_nest_start(skb, IFLA_INFO_SLAVE_DATA); + slave_data = nla_nest_start_noflag(skb, IFLA_INFO_SLAVE_DATA); if (!slave_data) return -EMSGSIZE; err = ops->fill_slave_info(skb, master_dev, dev); @@ -666,7 +666,7 @@ static int rtnl_link_info_fill(struct sk_buff *skb, return err; } if (ops->fill_info) { - data = nla_nest_start(skb, IFLA_INFO_DATA); + data = nla_nest_start_noflag(skb, IFLA_INFO_DATA); if (data == NULL) return -EMSGSIZE; err = ops->fill_info(skb, dev); @@ -686,7 +686,7 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) struct nlattr *linkinfo; int err = -EMSGSIZE; - linkinfo = nla_nest_start(skb, IFLA_LINKINFO); + linkinfo = nla_nest_start_noflag(skb, IFLA_LINKINFO); if (linkinfo == NULL) goto out; @@ -755,7 +755,7 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) struct nlattr *mx; int i, valid = 0; - mx = nla_nest_start(skb, RTA_METRICS); + mx = nla_nest_start_noflag(skb, RTA_METRICS); if (mx == NULL) return -ENOBUFS; @@ -1036,12 +1036,12 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) int vf; int err; - vf_ports = nla_nest_start(skb, IFLA_VF_PORTS); + vf_ports = nla_nest_start_noflag(skb, IFLA_VF_PORTS); if (!vf_ports) return -EMSGSIZE; for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { - vf_port = nla_nest_start(skb, IFLA_VF_PORT); + vf_port = nla_nest_start_noflag(skb, IFLA_VF_PORT); if (!vf_port) goto nla_put_failure; if (nla_put_u32(skb, IFLA_PORT_VF, vf)) @@ -1070,7 +1070,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) struct nlattr *port_self; int err; - port_self = nla_nest_start(skb, IFLA_PORT_SELF); + port_self = nla_nest_start_noflag(skb, IFLA_PORT_SELF); if (!port_self) return -EMSGSIZE; @@ -1247,7 +1247,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_linkstate.link_state = ivi.linkstate; vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; - vf = nla_nest_start(skb, IFLA_VF_INFO); + vf = nla_nest_start_noflag(skb, IFLA_VF_INFO); if (!vf) goto nla_put_vfinfo_failure; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || @@ -1266,7 +1266,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; - vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST); + vfvlanlist = nla_nest_start_noflag(skb, IFLA_VF_VLAN_LIST); if (!vfvlanlist) goto nla_put_vf_failure; if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), @@ -1279,7 +1279,7 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, if (dev->netdev_ops->ndo_get_vf_stats) dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, &vf_stats); - vfstats = nla_nest_start(skb, IFLA_VF_STATS); + vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); if (!vfstats) goto nla_put_vf_failure; if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, @@ -1329,7 +1329,7 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, if (!dev->netdev_ops->ndo_get_vf_config) return 0; - vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + vfinfo = nla_nest_start_noflag(skb, IFLA_VFINFO_LIST); if (!vfinfo) return -EMSGSIZE; @@ -1414,7 +1414,7 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) int err; u8 mode; - xdp = nla_nest_start(skb, IFLA_XDP); + xdp = nla_nest_start_noflag(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; @@ -1541,7 +1541,7 @@ static int rtnl_fill_link_af(struct sk_buff *skb, const struct rtnl_af_ops *af_ops; struct nlattr *af_spec; - af_spec = nla_nest_start(skb, IFLA_AF_SPEC); + af_spec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af_spec) return -EMSGSIZE; @@ -1552,7 +1552,7 @@ static int rtnl_fill_link_af(struct sk_buff *skb, if (!af_ops->fill_link_af) continue; - af = nla_nest_start(skb, af_ops->family); + af = nla_nest_start_noflag(skb, af_ops->family); if (!af) return -EMSGSIZE; @@ -4273,7 +4273,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; - br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); + br_afspec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!br_afspec) goto nla_put_failure; @@ -4297,7 +4297,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, } nla_nest_end(skb, br_afspec); - protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + protinfo = nla_nest_start(skb, IFLA_PROTINFO); if (!protinfo) goto nla_put_failure; @@ -4776,8 +4776,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS; - attr = nla_nest_start(skb, - IFLA_STATS_LINK_XSTATS); + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_XSTATS); if (!attr) goto nla_put_failure; @@ -4799,8 +4799,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; - attr = nla_nest_start(skb, - IFLA_STATS_LINK_XSTATS_SLAVE); + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_XSTATS_SLAVE); if (!attr) goto nla_put_failure; @@ -4815,7 +4815,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, *idxattr)) { *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; - attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); + attr = nla_nest_start_noflag(skb, + IFLA_STATS_LINK_OFFLOAD_XSTATS); if (!attr) goto nla_put_failure; @@ -4834,7 +4835,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct rtnl_af_ops *af_ops; *idxattr = IFLA_STATS_AF_SPEC; - attr = nla_nest_start(skb, IFLA_STATS_AF_SPEC); + attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC); if (!attr) goto nla_put_failure; @@ -4844,7 +4845,8 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct nlattr *af; int err; - af = nla_nest_start(skb, af_ops->family); + af = nla_nest_start_noflag(skb, + af_ops->family); if (!af) { rcu_read_unlock(); goto nla_put_failure; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index a556cd708885..3fd3aa7348bd 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -246,7 +246,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_PFC_CFG); + nest = nla_nest_start_noflag(skb, DCB_ATTR_PFC_CFG); if (!nest) return -EMSGSIZE; @@ -304,7 +304,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh, if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_CAP); + nest = nla_nest_start_noflag(skb, DCB_ATTR_CAP); if (!nest) return -EMSGSIZE; @@ -348,7 +348,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_NUMTCS); + nest = nla_nest_start_noflag(skb, DCB_ATTR_NUMTCS); if (!nest) return -EMSGSIZE; @@ -479,7 +479,7 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, up = dcb_getapp(netdev, &app); } - app_nest = nla_nest_start(skb, DCB_ATTR_APP); + app_nest = nla_nest_start_noflag(skb, DCB_ATTR_APP); if (!app_nest) return -EMSGSIZE; @@ -578,7 +578,7 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (ret) return ret; - pg_nest = nla_nest_start(skb, DCB_ATTR_PG_CFG); + pg_nest = nla_nest_start_noflag(skb, DCB_ATTR_PG_CFG); if (!pg_nest) return -EMSGSIZE; @@ -598,7 +598,7 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (ret) goto err_pg; - param_nest = nla_nest_start(skb, i); + param_nest = nla_nest_start_noflag(skb, i); if (!param_nest) goto err_pg; @@ -889,7 +889,7 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (ret) return ret; - bcn_nest = nla_nest_start(skb, DCB_ATTR_BCN); + bcn_nest = nla_nest_start_noflag(skb, DCB_ATTR_BCN); if (!bcn_nest) return -EMSGSIZE; @@ -1002,7 +1002,7 @@ static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, */ err = -EMSGSIZE; - app = nla_nest_start(skb, app_nested_type); + app = nla_nest_start_noflag(skb, app_nested_type); if (!app) goto nla_put_failure; @@ -1036,7 +1036,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (nla_put_string(skb, DCB_ATTR_IFNAME, netdev->name)) return -EMSGSIZE; - ieee = nla_nest_start(skb, DCB_ATTR_IEEE); + ieee = nla_nest_start_noflag(skb, DCB_ATTR_IEEE); if (!ieee) return -EMSGSIZE; @@ -1106,7 +1106,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) return -EMSGSIZE; } - app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE); + app = nla_nest_start_noflag(skb, DCB_ATTR_IEEE_APP_TABLE); if (!app) return -EMSGSIZE; @@ -1174,13 +1174,13 @@ static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, u8 pgid, up_map, prio, tc_pct; const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; int i = dir ? DCB_ATTR_CEE_TX_PG : DCB_ATTR_CEE_RX_PG; - struct nlattr *pg = nla_nest_start(skb, i); + struct nlattr *pg = nla_nest_start_noflag(skb, i); if (!pg) return -EMSGSIZE; for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { - struct nlattr *tc_nest = nla_nest_start(skb, i); + struct nlattr *tc_nest = nla_nest_start_noflag(skb, i); if (!tc_nest) return -EMSGSIZE; @@ -1231,7 +1231,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) if (nla_put_string(skb, DCB_ATTR_IFNAME, netdev->name)) goto nla_put_failure; - cee = nla_nest_start(skb, DCB_ATTR_CEE); + cee = nla_nest_start_noflag(skb, DCB_ATTR_CEE); if (!cee) goto nla_put_failure; @@ -1250,7 +1250,8 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* local pfc */ if (ops->getpfccfg) { - struct nlattr *pfc_nest = nla_nest_start(skb, DCB_ATTR_CEE_PFC); + struct nlattr *pfc_nest = nla_nest_start_noflag(skb, + DCB_ATTR_CEE_PFC); if (!pfc_nest) goto nla_put_failure; @@ -1265,14 +1266,14 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* local app */ spin_lock_bh(&dcb_lock); - app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE); + app = nla_nest_start_noflag(skb, DCB_ATTR_CEE_APP_TABLE); if (!app) goto dcb_unlock; list_for_each_entry(itr, &dcb_app_list, list) { if (itr->ifindex == netdev->ifindex) { - struct nlattr *app_nest = nla_nest_start(skb, - DCB_ATTR_APP); + struct nlattr *app_nest = nla_nest_start_noflag(skb, + DCB_ATTR_APP); if (!app_nest) goto dcb_unlock; @@ -1305,7 +1306,8 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) /* features flags */ if (ops->getfeatcfg) { - struct nlattr *feat = nla_nest_start(skb, DCB_ATTR_CEE_FEAT); + struct nlattr *feat = nla_nest_start_noflag(skb, + DCB_ATTR_CEE_FEAT); if (!feat) goto nla_put_failure; @@ -1607,7 +1609,7 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (ret) return ret; - nest = nla_nest_start(skb, DCB_ATTR_FEATCFG); + nest = nla_nest_start_noflag(skb, DCB_ATTR_FEATCFG); if (!nest) return -EMSGSIZE; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index f0710b5d037d..2fb764321b97 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -348,7 +348,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, struct rtnexthop *nhp; struct nlattr *mp_head; - if (!(mp_head = nla_nest_start(skb, RTA_MULTIPATH))) + if (!(mp_head = nla_nest_start_noflag(skb, RTA_MULTIPATH))) goto errout; for_nexthops(fi) { diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 308370cfd668..1a002eb85096 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -312,7 +312,7 @@ static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq, static int nl802154_put_flags(struct sk_buff *msg, int attr, u32 mask) { - struct nlattr *nl_flags = nla_nest_start(msg, attr); + struct nlattr *nl_flags = nla_nest_start_noflag(msg, attr); int i; if (!nl_flags) @@ -338,7 +338,7 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev, struct nlattr *nl_page; unsigned long page; - nl_page = nla_nest_start(msg, NL802154_ATTR_CHANNELS_SUPPORTED); + nl_page = nla_nest_start_noflag(msg, NL802154_ATTR_CHANNELS_SUPPORTED); if (!nl_page) return -ENOBUFS; @@ -360,11 +360,11 @@ nl802154_put_capabilities(struct sk_buff *msg, struct nlattr *nl_caps, *nl_channels; int i; - nl_caps = nla_nest_start(msg, NL802154_ATTR_WPAN_PHY_CAPS); + nl_caps = nla_nest_start_noflag(msg, NL802154_ATTR_WPAN_PHY_CAPS); if (!nl_caps) return -ENOBUFS; - nl_channels = nla_nest_start(msg, NL802154_CAP_ATTR_CHANNELS); + nl_channels = nla_nest_start_noflag(msg, NL802154_CAP_ATTR_CHANNELS); if (!nl_channels) return -ENOBUFS; @@ -380,8 +380,8 @@ nl802154_put_capabilities(struct sk_buff *msg, if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_CCA_ED_LEVEL) { struct nlattr *nl_ed_lvls; - nl_ed_lvls = nla_nest_start(msg, - NL802154_CAP_ATTR_CCA_ED_LEVELS); + nl_ed_lvls = nla_nest_start_noflag(msg, + NL802154_CAP_ATTR_CCA_ED_LEVELS); if (!nl_ed_lvls) return -ENOBUFS; @@ -396,7 +396,8 @@ nl802154_put_capabilities(struct sk_buff *msg, if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER) { struct nlattr *nl_tx_pwrs; - nl_tx_pwrs = nla_nest_start(msg, NL802154_CAP_ATTR_TX_POWERS); + nl_tx_pwrs = nla_nest_start_noflag(msg, + NL802154_CAP_ATTR_TX_POWERS); if (!nl_tx_pwrs) return -ENOBUFS; @@ -504,7 +505,7 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev, if (nl802154_put_capabilities(msg, rdev)) goto nla_put_failure; - nl_cmds = nla_nest_start(msg, NL802154_ATTR_SUPPORTED_COMMANDS); + nl_cmds = nla_nest_start_noflag(msg, NL802154_ATTR_SUPPORTED_COMMANDS); if (!nl_cmds) goto nla_put_failure; @@ -693,7 +694,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg, switch (desc->mode) { case NL802154_KEY_ID_MODE_IMPLICIT: - nl_dev_addr = nla_nest_start(msg, NL802154_KEY_ID_ATTR_IMPLICIT); + nl_dev_addr = nla_nest_start_noflag(msg, + NL802154_KEY_ID_ATTR_IMPLICIT); if (!nl_dev_addr) return -ENOBUFS; @@ -768,7 +770,7 @@ static int nl802154_get_llsec_params(struct sk_buff *msg, params.frame_counter)) return -ENOBUFS; - nl_key_id = nla_nest_start(msg, NL802154_ATTR_SEC_OUT_KEY_ID); + nl_key_id = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_OUT_KEY_ID); if (!nl_key_id) return -ENOBUFS; @@ -1455,11 +1457,11 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_key = nla_nest_start(msg, NL802154_ATTR_SEC_KEY); + nl_key = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_KEY); if (!nl_key) goto nla_put_failure; - nl_key_id = nla_nest_start(msg, NL802154_KEY_ATTR_ID); + nl_key_id = nla_nest_start_noflag(msg, NL802154_KEY_ATTR_ID); if (!nl_key_id) goto nla_put_failure; @@ -1639,7 +1641,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_device = nla_nest_start(msg, NL802154_ATTR_SEC_DEVICE); + nl_device = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_DEVICE); if (!nl_device) goto nla_put_failure; @@ -1808,7 +1810,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_devkey = nla_nest_start(msg, NL802154_ATTR_SEC_DEVKEY); + nl_devkey = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_DEVKEY); if (!nl_devkey) goto nla_put_failure; @@ -1818,7 +1820,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, devkey->frame_counter)) goto nla_put_failure; - nl_key_id = nla_nest_start(msg, NL802154_DEVKEY_ATTR_ID); + nl_key_id = nla_nest_start_noflag(msg, NL802154_DEVKEY_ATTR_ID); if (!nl_key_id) goto nla_put_failure; @@ -1976,7 +1978,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - nl_seclevel = nla_nest_start(msg, NL802154_ATTR_SEC_LEVEL); + nl_seclevel = nla_nest_start_noflag(msg, NL802154_ATTR_SEC_LEVEL); if (!nl_seclevel) goto nla_put_failure; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4336f1ec8ab0..71c2165a2ce3 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1550,7 +1550,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) { struct nlattr *mp; - mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a8eb97777c0a..1322573b8228 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2783,7 +2783,7 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) return true; vif = &mrt->vif_table[vifid]; - vif_nest = nla_nest_start(skb, IPMRA_VIF); + vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); if (!vif_nest) return false; if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || @@ -2867,7 +2867,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) memset(hdr, 0, sizeof(*hdr)); hdr->ifi_family = RTNL_FAMILY_IPMR; - af = nla_nest_start(skb, IFLA_AF_SPEC); + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af) { nlmsg_cancel(skb, nlh); goto out; @@ -2878,7 +2878,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) goto out; } - vifs = nla_nest_start(skb, IPMRA_TABLE_VIFS); + vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS); if (!vifs) { nla_nest_end(skb, af); nlmsg_end(skb, nlh); diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 3e614cc824f7..278834d4babc 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -228,7 +228,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (c->mfc_flags & MFC_OFFLOAD) rtm->rtm_flags |= RTNH_F_OFFLOAD; - mp_attr = nla_nest_start(skb, RTA_MULTIPATH); + mp_attr = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp_attr) return -EMSGSIZE; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4ccec4c705f7..9a08bfb0672c 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -658,7 +658,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, { int n = 0; - nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); + nest = nla_nest_start_noflag(msg, TCP_METRICS_ATTR_VALS); if (!nest) goto nla_put_failure; for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 340a0f06f974..01f081aa718c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5752,7 +5752,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, nla_put_u8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN)) goto nla_put_failure; - protoinfo = nla_nest_start(skb, IFLA_PROTINFO); + protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO); if (!protoinfo) goto nla_put_failure; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c0127a44f9f..e2b47f47de92 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4777,7 +4777,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *sibling, *next_sibling; struct nlattr *mp; - mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 60325dbfe88b..67005ac71341 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -853,7 +853,7 @@ static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (!slwt->bpf.prog) return 0; - nest = nla_nest_start(skb, SEG6_LOCAL_BPF); + nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF); if (!nest) return -EMSGSIZE; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 77595fcc9f75..c31b50cc48d9 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -345,7 +345,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla nla_put_u16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap)) goto nla_put_failure; - nest = nla_nest_start(skb, L2TP_ATTR_STATS); + nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); if (nest == NULL) goto nla_put_failure; @@ -742,7 +742,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl session->reorder_timeout, L2TP_ATTR_PAD))) goto nla_put_failure; - nest = nla_nest_start(skb, L2TP_ATTR_STATS); + nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); if (nest == NULL) goto nla_put_failure; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index e321a5fafb87..01f8a4f97872 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2017,7 +2017,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, u8 linkdown = 0; u8 dead = 0; - mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 367b2f6513e0..672ed56b5ef0 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -79,7 +79,7 @@ static int ncsi_write_channel_info(struct sk_buff *skb, nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2); nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name); - vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); + vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); if (!vid_nest) return -ENOMEM; ncf = &nc->vlan_filter; @@ -113,19 +113,19 @@ static int ncsi_write_package_info(struct sk_buff *skb, NCSI_FOR_EACH_PACKAGE(ndp, np) { if (np->id != id) continue; - pnest = nla_nest_start(skb, NCSI_PKG_ATTR); + pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); - cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST); + cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); if (!cnest) { nla_nest_cancel(skb, pnest); return -ENOMEM; } NCSI_FOR_EACH_CHANNEL(np, nc) { - nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR); + nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR); if (!nest) { nla_nest_cancel(skb, cnest); nla_nest_cancel(skb, pnest); @@ -187,7 +187,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info) package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]); - attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST); if (!attr) { kfree_skb(skb); return -EMSGSIZE; @@ -250,7 +250,7 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, goto err; } - attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + attr = nla_nest_start_noflag(skb, NCSI_ATTR_PACKAGE_LIST); if (!attr) { rc = -EMSGSIZE; goto err; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ab119a7540db..39892e5d38a2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2916,7 +2916,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, struct ip_vs_kstats *kstats) { - struct nlattr *nl_stats = nla_nest_start(skb, container_type); + struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); if (!nl_stats) return -EMSGSIZE; @@ -2946,7 +2946,7 @@ nla_put_failure: static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, struct ip_vs_kstats *kstats) { - struct nlattr *nl_stats = nla_nest_start(skb, container_type); + struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); if (!nl_stats) return -EMSGSIZE; @@ -2992,7 +2992,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_kstats kstats; char *sched_name; - nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); + nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) return -EMSGSIZE; @@ -3203,7 +3203,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) struct nlattr *nl_dest; struct ip_vs_kstats kstats; - nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); + nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST); if (!nl_dest) return -EMSGSIZE; @@ -3373,7 +3373,7 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, { struct nlattr *nl_daemon; - nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); + nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON); if (!nl_daemon) return -EMSGSIZE; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d547a777192f..148b99a15b21 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -63,7 +63,7 @@ static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, int ret = 0; struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_PROTO); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTO_NUM, tuple->dst.protonum)) @@ -104,7 +104,7 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, int ret = 0; struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, CTA_TUPLE_IP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_IP); if (!nest_parms) goto nla_put_failure; @@ -187,7 +187,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) if (!l4proto->to_nlattr) return 0; - nest_proto = nla_nest_start(skb, CTA_PROTOINFO | NLA_F_NESTED); + nest_proto = nla_nest_start(skb, CTA_PROTOINFO); if (!nest_proto) goto nla_put_failure; @@ -215,7 +215,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb, if (!helper) goto out; - nest_helper = nla_nest_start(skb, CTA_HELP | NLA_F_NESTED); + nest_helper = nla_nest_start(skb, CTA_HELP); if (!nest_helper) goto nla_put_failure; if (nla_put_string(skb, CTA_HELP_NAME, helper->name)) @@ -249,7 +249,7 @@ dump_counters(struct sk_buff *skb, struct nf_conn_acct *acct, bytes = atomic64_read(&counter[dir].bytes); } - nest_count = nla_nest_start(skb, attr | NLA_F_NESTED); + nest_count = nla_nest_start(skb, attr); if (!nest_count) goto nla_put_failure; @@ -293,7 +293,7 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) if (!tstamp) return 0; - nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED); + nest_count = nla_nest_start(skb, CTA_TIMESTAMP); if (!nest_count) goto nla_put_failure; @@ -337,7 +337,7 @@ static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) return 0; ret = -1; - nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED); + nest_secctx = nla_nest_start(skb, CTA_SECCTX); if (!nest_secctx) goto nla_put_failure; @@ -397,7 +397,7 @@ static int ctnetlink_dump_master(struct sk_buff *skb, const struct nf_conn *ct) if (!(ct->status & IPS_EXPECTED)) return 0; - nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_MASTER); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, master_tuple(ct)) < 0) @@ -415,7 +415,7 @@ dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type) { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, type | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, type); if (!nest_parms) goto nla_put_failure; @@ -467,7 +467,7 @@ static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct) if (!synproxy) return 0; - nest_parms = nla_nest_start(skb, CTA_SYNPROXY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_SYNPROXY); if (!nest_parms) goto nla_put_failure; @@ -528,7 +528,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, zone = nf_ct_zone(ct); - nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) @@ -538,7 +538,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, goto nla_put_failure; nla_nest_end(skb, nest_parms); - nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) @@ -720,7 +720,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) zone = nf_ct_zone(ct); - nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) @@ -730,7 +730,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; nla_nest_end(skb, nest_parms); - nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) @@ -2400,7 +2400,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) zone = nf_ct_zone(ct); - nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) @@ -2410,7 +2410,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; nla_nest_end(skb, nest_parms); - nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) @@ -2472,7 +2472,7 @@ ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct, { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, ct_attr); if (!nest_parms) goto nla_put_failure; @@ -2644,7 +2644,7 @@ static int ctnetlink_exp_dump_tuple(struct sk_buff *skb, { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, type | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, type); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, tuple) < 0) @@ -2671,7 +2671,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, m.src.u.all = mask->src.u.all; m.dst.protonum = tuple->dst.protonum; - nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK); if (!nest_parms) goto nla_put_failure; @@ -2743,7 +2743,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NF_NAT) if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || exp->saved_proto.all) { - nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT); if (!nest_parms) goto nla_put_failure; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 6fca80587505..a4deddebec0a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -598,7 +598,7 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nlattr *nest_parms; spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP); if (!nest_parms) goto nla_put_failure; if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) || diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index a7818101ad80..8cf36b684400 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -520,7 +520,7 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nlattr *nest_parms; spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP); if (!nest_parms) goto nla_put_failure; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a06875a466a4..ec6c3618333d 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1192,7 +1192,7 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, struct nf_ct_tcp_flags tmp = {}; spin_lock_bh(&ct->lock); - nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP); if (!nest_parms) goto nla_put_failure; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9d888dc6be38..2b79c250ecb4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1200,7 +1200,7 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) total.pkts += pkts; total.bytes += bytes; } - nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS); + nest = nla_nest_start_noflag(skb, NFTA_CHAIN_COUNTERS); if (nest == NULL) goto nla_put_failure; @@ -1248,7 +1248,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, const struct nf_hook_ops *ops = &basechain->ops; struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_CHAIN_HOOK); + nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK); if (nest == NULL) goto nla_put_failure; if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum))) @@ -2059,7 +2059,8 @@ static int nf_tables_fill_expr_info(struct sk_buff *skb, goto nla_put_failure; if (expr->ops->dump) { - struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA); + struct nlattr *data = nla_nest_start_noflag(skb, + NFTA_EXPR_DATA); if (data == NULL) goto nla_put_failure; if (expr->ops->dump(skb, expr) < 0) @@ -2078,7 +2079,7 @@ int nft_expr_dump(struct sk_buff *skb, unsigned int attr, { struct nlattr *nest; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (!nest) goto nla_put_failure; if (nf_tables_fill_expr_info(skb, expr) < 0) @@ -2289,7 +2290,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; } - list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS); + list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS); if (list == NULL) goto nla_put_failure; nft_rule_for_each_expr(expr, next, rule) { @@ -3258,7 +3259,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata)) goto nla_put_failure; - desc = nla_nest_start(skb, NFTA_SET_DESC); + desc = nla_nest_start_noflag(skb, NFTA_SET_DESC); if (desc == NULL) goto nla_put_failure; if (set->size && @@ -3908,7 +3909,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_LIST_ELEM); + nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) goto nla_put_failure; @@ -4052,7 +4053,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name)) goto nla_put_failure; - nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS); if (nest == NULL) goto nla_put_failure; @@ -4124,7 +4125,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, if (nla_put_string(skb, NFTA_SET_NAME, set->name)) goto nla_put_failure; - nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS); + nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS); if (nest == NULL) goto nla_put_failure; @@ -5014,7 +5015,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr, { struct nlattr *nest; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (!nest) goto nla_put_failure; if (obj->ops->dump(skb, obj, reset) < 0) @@ -5831,14 +5832,14 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); + nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK); if (!nest) goto nla_put_failure; if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) goto nla_put_failure; - nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS); + nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS); if (!nest_devs) goto nla_put_failure; @@ -7264,7 +7265,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, type); + nest = nla_nest_start_noflag(skb, type); if (!nest) goto nla_put_failure; @@ -7377,7 +7378,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, struct nlattr *nest; int err; - nest = nla_nest_start(skb, attr); + nest = nla_nest_start_noflag(skb, attr); if (nest == NULL) return -1; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index e5d27b2e4eba..74c9794d28d6 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -462,7 +462,7 @@ nfnl_cthelper_dump_tuple(struct sk_buff *skb, { struct nlattr *nest_parms; - nest_parms = nla_nest_start(skb, NFCTH_TUPLE | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, NFCTH_TUPLE); if (nest_parms == NULL) goto nla_put_failure; @@ -487,7 +487,7 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, int i; struct nlattr *nest_parms1, *nest_parms2; - nest_parms1 = nla_nest_start(skb, NFCTH_POLICY | NLA_F_NESTED); + nest_parms1 = nla_nest_start(skb, NFCTH_POLICY); if (nest_parms1 == NULL) goto nla_put_failure; @@ -496,8 +496,7 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; for (i = 0; i < helper->expect_class_max + 1; i++) { - nest_parms2 = nla_nest_start(skb, - (NFCTH_POLICY_SET+i) | NLA_F_NESTED); + nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET + i)); if (nest_parms2 == NULL) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c69b11ca5aad..572cb42e1ee1 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -184,7 +184,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, htonl(refcount_read(&timeout->refcnt)))) goto nla_put_failure; - nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; @@ -401,7 +401,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) goto nla_put_failure; - nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA | NLA_F_NESTED); + nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e057b2961d31..be7d53943e2d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -351,7 +351,7 @@ static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) if (skb_vlan_tag_present(entskb)) { struct nlattr *nest; - nest = nla_nest_start(skb, NFQA_VLAN | NLA_F_NESTED); + nest = nla_nest_start(skb, NFQA_VLAN); if (!nest) goto nla_put_failure; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 7b717fad6cdc..1738ef6dcb56 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -928,7 +928,7 @@ static int nft_ct_timeout_obj_dump(struct sk_buff *skb, nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num))) return -1; - nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED); + nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA); if (!nest_params) return -1; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index b113fcac94e1..66b52d015763 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -437,7 +437,7 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info) struct nlattr *nest; if (info->mode & IP_TUNNEL_INFO_IPV6) { - nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP6); + nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP6); if (!nest) return -1; @@ -448,7 +448,7 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info) nla_nest_end(skb, nest); } else { - nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP); + nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_IP); if (!nest) return -1; @@ -468,7 +468,7 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, struct nft_tunnel_opts *opts = &priv->opts; struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_OPTS); + nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS); if (!nest) return -1; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index ba7800f94ccc..c9775658fb98 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -498,7 +498,7 @@ list_start: if (ret_val != 0) goto list_failure_lock; - nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_TAGLST); + nla_a = nla_nest_start_noflag(ans_skb, NLBL_CIPSOV4_A_TAGLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_failure_lock; @@ -517,7 +517,8 @@ list_start: switch (doi_def->type) { case CIPSO_V4_MAP_TRANS: - nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); + nla_a = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSLVLLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_failure_lock; @@ -529,7 +530,8 @@ list_start: CIPSO_V4_INV_LVL) continue; - nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVL); + nla_b = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSLVL); if (nla_b == NULL) { ret_val = -ENOMEM; goto list_retry; @@ -548,7 +550,8 @@ list_start: } nla_nest_end(ans_skb, nla_a); - nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCATLST); + nla_a = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSCATLST); if (nla_a == NULL) { ret_val = -ENOMEM; goto list_retry; @@ -560,7 +563,8 @@ list_start: CIPSO_V4_INV_CAT) continue; - nla_b = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSCAT); + nla_b = nla_nest_start_noflag(ans_skb, + NLBL_CIPSOV4_A_MLSCAT); if (nla_b == NULL) { ret_val = -ENOMEM; goto list_retry; diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index a16eacfb2236..c6c8a101f2ff 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -315,7 +315,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: - nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); + nla_a = nla_nest_start_noflag(skb, NLBL_MGMT_A_SELECTORLIST); if (nla_a == NULL) return -ENOMEM; @@ -323,7 +323,8 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, struct netlbl_domaddr4_map *map4; struct in_addr addr_struct; - nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + nla_b = nla_nest_start_noflag(skb, + NLBL_MGMT_A_ADDRSELECTOR); if (nla_b == NULL) return -ENOMEM; @@ -357,7 +358,8 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { struct netlbl_domaddr6_map *map6; - nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + nla_b = nla_nest_start_noflag(skb, + NLBL_MGMT_A_ADDRSELECTOR); if (nla_b == NULL) return -ENOMEM; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 288456090710..83e876591f6c 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -665,7 +665,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, struct nlattr *nla_ops; int i; - nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + nla_ops = nla_nest_start_noflag(skb, CTRL_ATTR_OPS); if (nla_ops == NULL) goto nla_put_failure; @@ -681,7 +681,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, if (family->policy) op_flags |= GENL_CMD_CAP_HASPOL; - nest = nla_nest_start(skb, i + 1); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; @@ -699,7 +699,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, struct nlattr *nla_grps; int i; - nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS); if (nla_grps == NULL) goto nla_put_failure; @@ -709,7 +709,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, grp = &family->mcgrps[i]; - nest = nla_nest_start(skb, i + 1); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; @@ -749,11 +749,11 @@ static int ctrl_fill_mcgrp_info(const struct genl_family *family, nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id)) goto nla_put_failure; - nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS); if (nla_grps == NULL) goto nla_put_failure; - nest = nla_nest_start(skb, 1); + nest = nla_nest_start_noflag(skb, 1); if (nest == NULL) goto nla_put_failure; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 4d9f3ac8d562..f91ce7c82746 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -392,7 +392,7 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) goto nla_put_failure; - sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP); + sdp_attr = nla_nest_start_noflag(msg, NFC_ATTR_LLC_SDP); if (sdp_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; @@ -402,7 +402,7 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) hlist_for_each_entry_safe(sdres, n, sdres_list, node) { pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap); - uri_attr = nla_nest_start(msg, i++); + uri_attr = nla_nest_start_noflag(msg, i++); if (uri_attr == NULL) { rc = -ENOMEM; goto nla_put_failure; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 626629944450..ff8baf810bb3 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1683,7 +1683,7 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, { struct nlattr *start; - start = nla_nest_start(skb, OVS_CT_ATTR_NAT); + start = nla_nest_start_noflag(skb, OVS_CT_ATTR_NAT); if (!start) return false; @@ -1750,7 +1750,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, { struct nlattr *start; - start = nla_nest_start(skb, OVS_ACTION_ATTR_CT); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CT); if (!start) return -EMSGSIZE; @@ -2160,7 +2160,7 @@ static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) return PTR_ERR(reply); - nla_reply = nla_nest_start(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); + nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { err = ovs_ct_limit_get_zone_limit( diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a64d3eb1f9a9..356677c3a0c2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -463,7 +463,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, nla_data(upcall_info->userdata)); if (upcall_info->egress_tun_info) { - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + nla = nla_nest_start_noflag(user_skb, + OVS_PACKET_ATTR_EGRESS_TUN_KEY); if (!nla) { err = -EMSGSIZE; goto out; @@ -475,7 +476,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } if (upcall_info->actions_len) { - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); + nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS); if (!nla) { err = -EMSGSIZE; goto out; @@ -776,7 +777,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, * This can only fail for dump operations because the skb is always * properly sized for single flows. */ - start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); + start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS); if (start) { const struct sw_flow_actions *sf_acts; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 3563acd5f92e..2427b672107a 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -856,7 +856,7 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb, const struct vxlan_metadata *opts = tun_opts; struct nlattr *nla; - nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); + nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); if (!nla) return -EMSGSIZE; @@ -948,7 +948,7 @@ static int ip_tun_to_nlattr(struct sk_buff *skb, struct nlattr *nla; int err; - nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL); if (!nla) return -EMSGSIZE; @@ -1957,7 +1957,7 @@ static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, { struct nlattr *start; - start = nla_nest_start(skb, OVS_KEY_ATTR_NSH); + start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH); if (!start) return -EMSGSIZE; @@ -2040,14 +2040,15 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP); if (!swkey->eth.vlan.tci) goto unencap; if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) goto nla_put_failure; - in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + in_encap = nla_nest_start_noflag(skb, + OVS_KEY_ATTR_ENCAP); if (!swkey->eth.cvlan.tci) goto unencap; } @@ -2226,7 +2227,7 @@ int ovs_nla_put_key(const struct sw_flow_key *swkey, int err; struct nlattr *nla; - nla = nla_nest_start(skb, attr); + nla = nla_nest_start_noflag(skb, attr); if (!nla) return -EMSGSIZE; err = __ovs_nla_put_key(swkey, output, is_mask, skb); @@ -3252,7 +3253,7 @@ static int sample_action_to_attr(const struct nlattr *attr, const struct sample_arg *arg; struct nlattr *actions; - start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; @@ -3265,7 +3266,7 @@ static int sample_action_to_attr(const struct nlattr *attr, goto out; } - ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS); if (!ac_start) { err = -EMSGSIZE; goto out; @@ -3291,7 +3292,7 @@ static int clone_action_to_attr(const struct nlattr *attr, struct nlattr *start; int err = 0, rem = nla_len(attr); - start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE); if (!start) return -EMSGSIZE; @@ -3313,7 +3314,7 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr, const struct nlattr *a, *cpl_arg; int err = 0, rem = nla_len(attr); - start = nla_nest_start(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN); if (!start) return -EMSGSIZE; @@ -3332,8 +3333,8 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr, * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. */ a = nla_next(cpl_arg, &rem); - ac_start = nla_nest_start(skb, - OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); + ac_start = nla_nest_start_noflag(skb, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); if (!ac_start) { err = -EMSGSIZE; goto out; @@ -3351,8 +3352,8 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr, * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER. */ a = nla_next(a, &rem); - ac_start = nla_nest_start(skb, - OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); + ac_start = nla_nest_start_noflag(skb, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); if (!ac_start) { err = -EMSGSIZE; goto out; @@ -3386,7 +3387,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; - start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET); if (!start) return -EMSGSIZE; @@ -3418,7 +3419,7 @@ static int masked_set_action_to_set_action_attr(const struct nlattr *a, /* Revert the conversion we did from a non-masked set action to * masked set action. */ - nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET); if (!nla) return -EMSGSIZE; diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 0be3d097ae01..fdc8be7fd8f3 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -127,7 +127,7 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, OVS_METER_ATTR_PAD)) goto error; - nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto error; @@ -136,7 +136,7 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, for (i = 0; i < meter->n_bands; ++i, ++band) { struct nlattr *band_nla; - band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, sizeof(struct ovs_flow_stats), &band->stats)) @@ -166,11 +166,11 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) goto nla_put_failure; - nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); if (!nla) goto nla_put_failure; - band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); if (!band_nla) goto nla_put_failure; /* Currently only DROP band type is supported. */ diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 8f16f11f7ad3..54965ff8cc66 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -43,7 +43,7 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) if (vxlan->cfg.flags & VXLAN_F_GBP) { struct nlattr *exts; - exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); + exts = nla_nest_start_noflag(skb, OVS_TUNNEL_ATTR_EXTENSION); if (!exts) return -EMSGSIZE; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 19f6765566e7..258ce3b7b452 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -319,7 +319,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) if (!vport->ops->get_options) return 0; - nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); + nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS); if (!nla) return -EMSGSIZE; diff --git a/net/packet/diag.c b/net/packet/diag.c index 7ef1c881ae74..98abfd8644a4 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -39,7 +39,7 @@ static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) struct nlattr *mca; struct packet_mclist *ml; - mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST); + mca = nla_nest_start_noflag(nlskb, PACKET_DIAG_MCLIST); if (!mca) return -EMSGSIZE; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5a87e271d35a..641ad7575f24 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -242,7 +242,7 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, (unsigned long)p->tcfa_tm.lastuse)) continue; - nest = nla_nest_start(skb, n_i); + nest = nla_nest_start_noflag(skb, n_i); if (!nest) { index--; goto nla_put_failure; @@ -299,7 +299,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct tc_action *p; unsigned long id = 1; - nest = nla_nest_start(skb, 0); + nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_KIND, ops->kind)) @@ -776,7 +776,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) } rcu_read_unlock(); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); @@ -800,7 +800,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { a = actions[i]; - nest = nla_nest_start(skb, a->order); + nest = nla_nest_start_noflag(skb, a->order); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_1(skb, a, bind, ref); @@ -1052,7 +1052,7 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], t->tca__pad1 = 0; t->tca__pad2 = 0; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; @@ -1176,7 +1176,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, t->tca__pad1 = 0; t->tca__pad2 = 0; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) { NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; @@ -1508,7 +1508,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (!count_attr) goto out_module_put; - nest = nla_nest_start(skb, TCA_ACT_TAB); + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (nest == NULL) goto out_module_put; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 31c6ffb6abe7..7a87ce2e5a76 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -387,7 +387,7 @@ static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife) if (list_empty(&ife->metalist)) return 0; - nest = nla_nest_start(skb, TCA_IFE_METALST); + nest = nla_nest_start_noflag(skb, TCA_IFE_METALST); if (!nest) goto out_nlmsg_trim; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 287793abfaf9..ce4b54fa7834 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -108,14 +108,15 @@ err_out: static int tcf_pedit_key_ex_dump(struct sk_buff *skb, struct tcf_pedit_key_ex *keys_ex, int n) { - struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + struct nlattr *keys_start = nla_nest_start_noflag(skb, + TCA_PEDIT_KEYS_EX); if (!keys_start) goto nla_failure; for (; n > 0; n--) { struct nlattr *key_start; - key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + key_start = nla_nest_start_noflag(skb, TCA_PEDIT_KEY_EX); if (!key_start) goto nla_failure; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index d5aaf90a3971..45c0c253c7e8 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -426,7 +426,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, u8 *src = (u8 *)(info + 1); struct nlattr *start; - start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE); if (!start) return -EMSGSIZE; @@ -460,7 +460,7 @@ static int tunnel_key_opts_dump(struct sk_buff *skb, if (!info->options_len) return 0; - start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS); + start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS); if (!start) return -EMSGSIZE; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9115f053883f..78de717afddf 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3111,7 +3111,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { - nest = nla_nest_start(skb, exts->action); + nest = nla_nest_start_noflag(skb, exts->action); if (nest == NULL) goto nla_put_failure; @@ -3120,7 +3120,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) nla_nest_end(skb, nest); } else if (exts->police) { struct tc_action *act = tcf_exts_first_act(exts); - nest = nla_nest_start(skb, exts->police); + nest = nla_nest_start_noflag(skb, exts->police); if (nest == NULL || !act) goto nla_put_failure; if (tcf_action_dump_old(skb, act, 0, 0) < 0) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 687b0af67878..dd5fdb62c6df 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -288,7 +288,7 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index b4ac58039cb1..6fd569c5a036 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -591,7 +591,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, cls_bpf_offload_update_stats(tp, prog); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c1567854f95..b680dd684282 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -176,7 +176,7 @@ static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = head->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index eece1ee26930..cb29fe7d5ed3 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -629,7 +629,7 @@ static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0d8968803e98..8d4f7a672f14 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2051,7 +2051,7 @@ static int fl_dump_key_geneve_opt(struct sk_buff *skb, struct nlattr *nest; int opt_off = 0; - nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE); if (!nest) goto nla_put_failure; @@ -2087,7 +2087,7 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, if (!enc_opts->len) return 0; - nest = nla_nest_start(skb, enc_opt_type); + nest = nla_nest_start_noflag(skb, enc_opt_type); if (!nest) goto nla_put_failure; @@ -2333,7 +2333,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; @@ -2384,7 +2384,7 @@ static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv) struct fl_flow_key *key, *mask; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ad036b00427d..3fcc1d51b9d7 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -402,7 +402,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, if (!f->res.classid && !tcf_exts_has_actions(&f->exts)) return skb->len; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a13bc351a414..d54fa8e11b9e 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -303,7 +303,7 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = head->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index f006af23b64a..b3b9b151a61d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -607,7 +607,7 @@ static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 0719a21d9c41..fa059cf934a6 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -706,7 +706,7 @@ static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = f->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 24e0a62a65cc..1a2e7d5a8776 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -601,7 +601,7 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, tp, fh, skb, t, p, r); pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 48e76a3acf8a..499477058b2d 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1294,7 +1294,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, t->tcm_handle = n->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 1331a4c2d8ff..6f2d6a761dbe 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -440,14 +440,14 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) struct nlattr *top_start; struct nlattr *list_start; - top_start = nla_nest_start(skb, tlv); + top_start = nla_nest_start_noflag(skb, tlv); if (top_start == NULL) goto nla_put_failure; if (nla_put(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr)) goto nla_put_failure; - list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST); + list_start = nla_nest_start_noflag(skb, TCA_EMATCH_TREE_LIST); if (list_start == NULL) goto nla_put_failure; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c126b9f78d6e..6c81b22d214f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -542,7 +542,7 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_STAB); + nest = nla_nest_start_noflag(skb, TCA_STAB); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d714d3747bcb..c36aa57eb4af 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -609,7 +609,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_handle = flow->common.classid; tcm->tcm_info = flow->q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 259d97bc2abd..50db72fe44de 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2735,7 +2735,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!opts) goto nla_put_failure; @@ -2806,7 +2806,7 @@ nla_put_failure: static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { - struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP); + struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); struct cake_sched_data *q = qdisc_priv(sch); struct nlattr *tstats, *ts; int i; @@ -2836,7 +2836,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) #undef PUT_STAT_U32 #undef PUT_STAT_U64 - tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS); + tstats = nla_nest_start_noflag(d->skb, TCA_CAKE_STATS_TIN_STATS); if (!tstats) goto nla_put_failure; @@ -2853,7 +2853,7 @@ static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) for (i = 0; i < q->tin_cnt; i++) { struct cake_tin_data *b = &q->tins[q->tin_order[i]]; - ts = nla_nest_start(d->skb, i + 1); + ts = nla_nest_start_noflag(d->skb, i + 1); if (!ts) goto nla_put_failure; @@ -2973,7 +2973,7 @@ static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (flow) { ktime_t now = ktime_get(); - stats = nla_nest_start(d->skb, TCA_STATS_APP); + stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); if (!stats) return -1; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 114b9048ea7e..243bce4b888b 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1305,7 +1305,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) struct cbq_sched_data *q = qdisc_priv(sch); struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (cbq_dump_attr(skb, &q->link) < 0) @@ -1340,7 +1340,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (cbq_dump_attr(skb, cl) < 0) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index f68fd7a0e038..adffc6d68c06 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -449,7 +449,7 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_cbs_qopt opt = { }; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index eafc0d17d174..eda21dc94bde 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -452,7 +452,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) .Scell_log = q->parms.Scell_log, }; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 17cd81f84b5d..60ac4e61ce3a 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -217,7 +217,7 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb) struct codel_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 430df9a55ec4..022db73fd5a9 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -244,7 +244,7 @@ static int drr_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_DRR_QUANTUM, cl->quantum)) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 42471464ded3..cdf744e710f1 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -432,7 +432,7 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); tcm->tcm_info = p->q->handle; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || @@ -451,7 +451,7 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) struct dsmark_qdisc_data *p = qdisc_priv(sch); struct nlattr *opts = NULL; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 1150f22983df..67107caa287c 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -460,7 +460,7 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_etf_qopt opt = { }; struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 1a662f2bb7bb..5ca370e78d3a 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -823,7 +823,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) u64 ce_threshold = q->ce_threshold; struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index cd04d40c30b6..825a933b019a 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -527,7 +527,7 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 234afbf9115b..9bfa15e12d23 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -772,7 +772,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (gred_offload_dump_stats(sch)) goto nla_put_failure; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) @@ -790,7 +790,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) goto nla_put_failure; /* Old style all-in-one dump of VQs */ - parms = nla_nest_start(skb, TCA_GRED_PARMS); + parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS); if (parms == NULL) goto nla_put_failure; @@ -841,7 +841,7 @@ append_opt: nla_nest_end(skb, parms); /* Dump the VQs again, in more structured way */ - vqs = nla_nest_start(skb, TCA_GRED_VQ_LIST); + vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST); if (!vqs) goto nla_put_failure; @@ -852,7 +852,7 @@ append_opt: if (!q) continue; - vq = nla_nest_start(skb, TCA_GRED_VQ_ENTRY); + vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY); if (!vq) goto nla_put_failure; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d2ab463f22ae..97d2fb91c39f 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1300,7 +1300,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, if (cl->level == 0) tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (hfsc_dump_curves(skb, cl) < 0) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 9d6a47697406..43bc159c4f7c 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -654,7 +654,7 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) struct hhf_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 2f9883b196e8..64010aec5437 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1057,7 +1057,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) gopt.defcls = q->defcls; gopt.debug = 0; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || @@ -1086,7 +1086,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, if (!cl->level && cl->leaf.q) tcm->tcm_info = cl->leaf.q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index ce3f55259d0d..0bac926b46c7 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -106,7 +106,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ea0dc112b38d..7afefed72d35 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -349,7 +349,7 @@ static int dump_rates(struct mqprio_sched *priv, int i; if (priv->flags & TC_MQPRIO_F_MIN_RATE) { - nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64); + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MIN_RATE64); if (!nest) goto nla_put_failure; @@ -363,7 +363,7 @@ static int dump_rates(struct mqprio_sched *priv, } if (priv->flags & TC_MQPRIO_F_MAX_RATE) { - nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64); + nest = nla_nest_start_noflag(skb, TCA_MQPRIO_MAX_RATE64); if (!nest) goto nla_put_failure; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index cc9d8133afcd..0242c0d4a2d0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1079,7 +1079,7 @@ static int dump_loss_model(const struct netem_sched_data *q, { struct nlattr *nest; - nest = nla_nest_start(skb, TCA_NETEM_LOSS); + nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); if (nest == NULL) goto nla_put_failure; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 1cc0c7b74aa3..9bf41f4a2312 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -491,7 +491,7 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb) struct pie_sched_data *q = qdisc_priv(sch); struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!opts) goto nla_put_failure; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 1589364b54da..bab2d4026e8b 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -619,7 +619,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) || diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 4e8c0abf6194..b9f34e057e87 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -318,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (err) goto nla_put_failure; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 2419fdb75966..f54b00a431a3 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -580,7 +580,7 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) }; sch->qstats.backlog = q->qdisc->qstats.backlog; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt)) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index df848a36b222..e016ee07dd1f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -841,7 +841,7 @@ static int dump_entry(struct sk_buff *msg, { struct nlattr *item; - item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); + item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY); if (!item) return -ENOSPC; @@ -883,7 +883,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.offset[i] = dev->tc_to_txq[i].offset; } - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) return -ENOSPC; @@ -897,7 +897,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; - entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); + entry_list = nla_nest_start_noflag(skb, + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); if (!entry_list) goto options_error; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index f71578dbb9e3..3ae5a29eeab3 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -448,7 +448,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_tbf_qopt opt; sch->qstats.backlog = q->qdisc->qstats.backlog; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d27f30a9a01d..fd8e4e83f5e0 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -687,14 +687,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) @@ -1033,14 +1033,14 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_MEDIA_PROP); if (!prop) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) diff --git a/net/tipc/group.c b/net/tipc/group.c index 63f39201e41e..992be6113676 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -917,7 +917,7 @@ void tipc_group_member_evt(struct tipc_group *grp, int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) { - struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + struct nlattr *group = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_GROUP); if (!group) return -EMSGSIZE; diff --git a/net/tipc/link.c b/net/tipc/link.c index 6053489c8063..0327c8ff8d48 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2228,7 +2228,7 @@ static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) (s->accu_queue_sz / s->queue_sz_counts) : 0} }; - stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!stats) return -EMSGSIZE; @@ -2260,7 +2260,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; @@ -2282,7 +2282,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) @@ -2349,7 +2349,7 @@ static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, (stats->accu_queue_sz / stats->queue_sz_counts) : 0} }; - nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!nest) return -EMSGSIZE; @@ -2389,7 +2389,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) return -EMSGSIZE; } - attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; @@ -2406,7 +2406,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) goto attr_msg_full; - prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 67f69389ec17..6a6eae88442f 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -696,7 +696,7 @@ static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER); if (!attrs) goto msg_full; @@ -785,7 +785,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 89993afe0fbd..66a65c2cdb23 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -829,11 +829,11 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE); if (!attrs) goto msg_full; - b = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); + b = nla_nest_start_noflag(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); if (!b) goto attr_msg_full; diff --git a/net/tipc/net.c b/net/tipc/net.c index 7ce1e86b024f..0bba4e6b005c 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -187,7 +187,7 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NET); if (!attrs) goto msg_full; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 340a6e7c43a7..36fe2dbb6d87 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -399,7 +399,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, b = (struct tipc_bearer_config *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; @@ -419,7 +419,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, return -EMSGSIZE; if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { - prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); if (!prop) return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) @@ -441,7 +441,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, name = (char *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; @@ -685,7 +685,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - media = nla_nest_start(skb, TIPC_NLA_MEDIA); + media = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA); if (!media) return -EMSGSIZE; @@ -696,7 +696,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_MEDIA_PROP); if (!prop) return -EMSGSIZE; @@ -717,7 +717,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + bearer = nla_nest_start_noflag(skb, TIPC_NLA_BEARER); if (!bearer) return -EMSGSIZE; @@ -728,7 +728,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_BEARER_PROP); if (!prop) return -EMSGSIZE; @@ -748,14 +748,14 @@ static int __tipc_nl_compat_link_set(struct sk_buff *skb, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - link = nla_nest_start(skb, TIPC_NLA_LINK); + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); if (!link) return -EMSGSIZE; if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) return -EMSGSIZE; - prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP); + prop = nla_nest_start_noflag(skb, TIPC_NLA_LINK_PROP); if (!prop) return -EMSGSIZE; @@ -811,7 +811,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, name = (char *)TLV_DATA(msg->req); - link = nla_nest_start(skb, TIPC_NLA_LINK); + link = nla_nest_start_noflag(skb, TIPC_NLA_LINK); if (!link) return -EMSGSIZE; @@ -973,7 +973,7 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) return -EMSGSIZE; } - nest = nla_nest_start(args, TIPC_NLA_SOCK); + nest = nla_nest_start_noflag(args, TIPC_NLA_SOCK); if (!nest) { kfree_skb(args); return -EMSGSIZE; @@ -1100,7 +1100,7 @@ static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd, val = ntohl(*(__be32 *)TLV_DATA(msg->req)); - net = nla_nest_start(skb, TIPC_NLA_NET); + net = nla_nest_start_noflag(skb, TIPC_NLA_NET); if (!net) return -EMSGSIZE; diff --git a/net/tipc/node.c b/net/tipc/node.c index 7478e2d4ec02..3777254a508f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1359,7 +1359,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_NODE); if (!attrs) goto msg_full; @@ -2353,7 +2353,7 @@ static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) if (!hdr) return -EMSGSIZE; - attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON); if (!attrs) goto msg_full; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1385207a301f..7918f4763fdc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3273,7 +3273,7 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) peer_node = tsk_peer_node(tsk); peer_port = tsk_peer_port(tsk); - nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON); if (!nest) return -EMSGSIZE; @@ -3332,7 +3332,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, if (!hdr) goto msg_cancel; - attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto genlmsg_cancel; @@ -3437,7 +3437,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, if (!(sk_filter_state & (1 << sk->sk_state))) return 0; - attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_SOCK); if (!attrs) goto msg_cancel; @@ -3455,7 +3455,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, TIPC_NLA_SOCK_PAD)) goto attr_msg_cancel; - stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT); + stat = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_STAT); if (!stat) goto attr_msg_cancel; @@ -3512,7 +3512,7 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb, if (!hdr) goto msg_cancel; - attrs = nla_nest_start(skb, TIPC_NLA_PUBL); + attrs = nla_nest_start_noflag(skb, TIPC_NLA_PUBL); if (!attrs) goto genlmsg_cancel; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 0884a1b8ad12..24d7c79598bb 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -523,7 +523,7 @@ int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) if (!ub) return -ENODEV; - nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); + nest = nla_nest_start_noflag(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); if (!nest) goto msg_full; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e74d21f4108a..0bcd5ea4b4f2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -755,13 +755,13 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg, { int j; struct nlattr *nl_wmm_rules = - nla_nest_start(msg, NL80211_FREQUENCY_ATTR_WMM); + nla_nest_start_noflag(msg, NL80211_FREQUENCY_ATTR_WMM); if (!nl_wmm_rules) goto nla_put_failure; for (j = 0; j < IEEE80211_NUM_ACS; j++) { - struct nlattr *nl_wmm_rule = nla_nest_start(msg, j); + struct nlattr *nl_wmm_rule = nla_nest_start_noflag(msg, j); if (!nl_wmm_rule) goto nla_put_failure; @@ -890,7 +890,7 @@ static bool nl80211_put_txq_stats(struct sk_buff *msg, return false; \ } while (0) - txqattr = nla_nest_start(msg, attrtype); + txqattr = nla_nest_start_noflag(msg, attrtype); if (!txqattr) return false; @@ -1205,7 +1205,7 @@ static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy, static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes) { - struct nlattr *nl_modes = nla_nest_start(msg, attr); + struct nlattr *nl_modes = nla_nest_start_noflag(msg, attr); int i; if (!nl_modes) @@ -1233,8 +1233,8 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, struct nlattr *nl_combis; int i, j; - nl_combis = nla_nest_start(msg, - NL80211_ATTR_INTERFACE_COMBINATIONS); + nl_combis = nla_nest_start_noflag(msg, + NL80211_ATTR_INTERFACE_COMBINATIONS); if (!nl_combis) goto nla_put_failure; @@ -1244,18 +1244,19 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, c = &wiphy->iface_combinations[i]; - nl_combi = nla_nest_start(msg, i + 1); + nl_combi = nla_nest_start_noflag(msg, i + 1); if (!nl_combi) goto nla_put_failure; - nl_limits = nla_nest_start(msg, NL80211_IFACE_COMB_LIMITS); + nl_limits = nla_nest_start_noflag(msg, + NL80211_IFACE_COMB_LIMITS); if (!nl_limits) goto nla_put_failure; for (j = 0; j < c->n_limits; j++) { struct nlattr *nl_limit; - nl_limit = nla_nest_start(msg, j + 1); + nl_limit = nla_nest_start_noflag(msg, j + 1); if (!nl_limit) goto nla_put_failure; if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX, @@ -1308,7 +1309,8 @@ static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, if (!tcp) return 0; - nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + nl_tcp = nla_nest_start_noflag(msg, + NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; @@ -1348,7 +1350,8 @@ static int nl80211_send_wowlan(struct sk_buff *msg, if (!rdev->wiphy.wowlan) return 0; - nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); + nl_wowlan = nla_nest_start_noflag(msg, + NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; @@ -1477,7 +1480,8 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, if (sband->n_iftype_data) { struct nlattr *nl_iftype_data = - nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA); + nla_nest_start_noflag(msg, + NL80211_BAND_ATTR_IFTYPE_DATA); int err; if (!nl_iftype_data) @@ -1486,7 +1490,7 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, for (i = 0; i < sband->n_iftype_data; i++) { struct nlattr *iftdata; - iftdata = nla_nest_start(msg, i + 1); + iftdata = nla_nest_start_noflag(msg, i + 1); if (!iftdata) return -ENOBUFS; @@ -1502,12 +1506,12 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, } /* add bitrates */ - nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); + nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES); if (!nl_rates) return -ENOBUFS; for (i = 0; i < sband->n_bitrates; i++) { - nl_rate = nla_nest_start(msg, i); + nl_rate = nla_nest_start_noflag(msg, i); if (!nl_rate) return -ENOBUFS; @@ -1540,12 +1544,12 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, if (!mgmt_stypes) return 0; - nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); + nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_TX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); + nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; @@ -1563,12 +1567,12 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, nla_nest_end(msg, nl_ifs); - nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); + nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_RX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); + nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; @@ -1686,7 +1690,7 @@ nl80211_send_pmsr_ftm_capa(const struct cfg80211_pmsr_capabilities *cap, if (!cap->ftm.supported) return 0; - ftm = nla_nest_start(msg, NL80211_PMSR_TYPE_FTM); + ftm = nla_nest_start_noflag(msg, NL80211_PMSR_TYPE_FTM); if (!ftm) return -ENOBUFS; @@ -1734,7 +1738,7 @@ static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, * will genlmsg_cancel() if we fail */ - pmsr = nla_nest_start(msg, NL80211_ATTR_PEER_MEASUREMENTS); + pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS); if (!pmsr) return -ENOBUFS; @@ -1749,7 +1753,7 @@ static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, nla_put_flag(msg, NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR)) return -ENOBUFS; - caps = nla_nest_start(msg, NL80211_PMSR_ATTR_TYPE_CAPA); + caps = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_TYPE_CAPA); if (!caps) return -ENOBUFS; @@ -1910,7 +1914,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, break; /* fall through */ case 3: - nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); + nl_bands = nla_nest_start_noflag(msg, + NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; @@ -1923,7 +1928,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (!sband) continue; - nl_band = nla_nest_start(msg, band); + nl_band = nla_nest_start_noflag(msg, band); if (!nl_band) goto nla_put_failure; @@ -1937,15 +1942,16 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, /* fall through */ default: /* add frequencies */ - nl_freqs = nla_nest_start( - msg, NL80211_BAND_ATTR_FREQS); + nl_freqs = nla_nest_start_noflag(msg, + NL80211_BAND_ATTR_FREQS); if (!nl_freqs) goto nla_put_failure; for (i = state->chan_start - 1; i < sband->n_channels; i++) { - nl_freq = nla_nest_start(msg, i); + nl_freq = nla_nest_start_noflag(msg, + i); if (!nl_freq) goto nla_put_failure; @@ -1990,7 +1996,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, break; /* fall through */ case 4: - nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); + nl_cmds = nla_nest_start_noflag(msg, + NL80211_ATTR_SUPPORTED_COMMANDS); if (!nl_cmds) goto nla_put_failure; @@ -2138,7 +2145,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; - nested = nla_nest_start(msg, NL80211_ATTR_VENDOR_DATA); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_VENDOR_DATA); if (!nested) goto nla_put_failure; @@ -2154,8 +2162,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; - nested = nla_nest_start(msg, - NL80211_ATTR_VENDOR_EVENTS); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_VENDOR_EVENTS); if (!nested) goto nla_put_failure; @@ -2192,7 +2200,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, struct nlattr *nested; u32 bss_select_support = rdev->wiphy.bss_select_support; - nested = nla_nest_start(msg, NL80211_ATTR_BSS_SELECT); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_BSS_SELECT); if (!nested) goto nla_put_failure; @@ -2214,8 +2223,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.iftype_ext_capab) { struct nlattr *nested_ext_capab, *nested; - nested = nla_nest_start(msg, - NL80211_ATTR_IFTYPE_EXT_CAPA); + nested = nla_nest_start_noflag(msg, + NL80211_ATTR_IFTYPE_EXT_CAPA); if (!nested) goto nla_put_failure; @@ -2225,7 +2234,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, capab = &rdev->wiphy.iftype_ext_capab[i]; - nested_ext_capab = nla_nest_start(msg, i); + nested_ext_capab = nla_nest_start_noflag(msg, + i); if (!nested_ext_capab || nla_put_u32(msg, NL80211_ATTR_IFTYPE, capab->iftype) || @@ -3539,7 +3549,7 @@ static void get_key_callback(void *c, struct key_params *params) params->cipher))) goto nla_put_failure; - key = nla_nest_start(cookie->msg, NL80211_ATTR_KEY); + key = nla_nest_start_noflag(cookie->msg, NL80211_ATTR_KEY); if (!key) goto nla_put_failure; @@ -4723,7 +4733,7 @@ bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) u16 bitrate_compat; enum nl80211_rate_info rate_flg; - rate = nla_nest_start(msg, attr); + rate = nla_nest_start_noflag(msg, attr); if (!rate) return false; @@ -4810,7 +4820,7 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, if (!mask) return true; - attr = nla_nest_start(msg, id); + attr = nla_nest_start_noflag(msg, id); if (!attr) return false; @@ -4845,7 +4855,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, nla_put_u32(msg, NL80211_ATTR_GENERATION, sinfo->generation)) goto nla_put_failure; - sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO); + sinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_STA_INFO); if (!sinfoattr) goto nla_put_failure; @@ -4934,7 +4944,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8); if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { - bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM); + bss_param = nla_nest_start_noflag(msg, + NL80211_STA_INFO_BSS_PARAM); if (!bss_param) goto nla_put_failure; @@ -4977,7 +4988,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, struct nlattr *tidsattr; int tid; - tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS); + tidsattr = nla_nest_start_noflag(msg, + NL80211_STA_INFO_TID_STATS); if (!tidsattr) goto nla_put_failure; @@ -4990,7 +5002,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, if (!tidstats->filled) continue; - tidattr = nla_nest_start(msg, tid + 1); + tidattr = nla_nest_start_noflag(msg, tid + 1); if (!tidattr) goto nla_put_failure; @@ -5875,7 +5887,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u32(msg, NL80211_ATTR_GENERATION, pinfo->generation)) goto nla_put_failure; - pinfoattr = nla_nest_start(msg, NL80211_ATTR_MPATH_INFO); + pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MPATH_INFO); if (!pinfoattr) goto nla_put_failure; if ((pinfo->filled & MPATH_INFO_FRAME_QLEN) && @@ -6326,7 +6338,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; - pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); + pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || @@ -6705,7 +6717,7 @@ static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) goto nla_put_failure; - nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); + nl_reg_rules = nla_nest_start_noflag(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) goto nla_put_failure; @@ -6720,7 +6732,7 @@ static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; - nl_reg_rule = nla_nest_start(msg, i); + nl_reg_rule = nla_nest_start_noflag(msg, i); if (!nl_reg_rule) goto nla_put_failure; @@ -8389,7 +8401,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, NL80211_ATTR_PAD)) goto nla_put_failure; - bss = nla_nest_start(msg, NL80211_ATTR_BSS); + bss = nla_nest_start_noflag(msg, NL80211_ATTR_BSS); if (!bss) goto nla_put_failure; if ((!is_zero_ether_addr(res->bssid) && @@ -8566,7 +8578,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - infoattr = nla_nest_start(msg, NL80211_ATTR_SURVEY_INFO); + infoattr = nla_nest_start_noflag(msg, NL80211_ATTR_SURVEY_INFO); if (!infoattr) goto nla_put_failure; @@ -9407,7 +9419,7 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, goto nla_put_failure; } - data = nla_nest_start(skb, attr); + data = nla_nest_start_noflag(skb, attr); if (!data) goto nla_put_failure; @@ -9581,7 +9593,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, break; } - tmdata = nla_nest_start(skb, NL80211_ATTR_TESTDATA); + tmdata = nla_nest_start_noflag(skb, NL80211_ATTR_TESTDATA); if (!tmdata) { genlmsg_cancel(skb, hdr); break; @@ -10859,12 +10871,12 @@ static int nl80211_send_wowlan_patterns(struct sk_buff *msg, if (!wowlan->n_patterns) return 0; - nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); + nl_pats = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (i = 0; i < wowlan->n_patterns; i++) { - nl_pat = nla_nest_start(msg, i + 1); + nl_pat = nla_nest_start_noflag(msg, i + 1); if (!nl_pat) return -ENOBUFS; pat_len = wowlan->patterns[i].pattern_len; @@ -10890,7 +10902,8 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg, if (!tcp) return 0; - nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + nl_tcp = nla_nest_start_noflag(msg, + NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; @@ -10934,7 +10947,7 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!req) return 0; - nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT); + nd = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_NET_DETECT); if (!nd) return -ENOBUFS; @@ -10960,7 +10973,7 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, return -ENOBUFS; } - freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); + freqs = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!freqs) return -ENOBUFS; @@ -10972,12 +10985,13 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, nla_nest_end(msg, freqs); if (req->n_match_sets) { - matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH); + matches = nla_nest_start_noflag(msg, + NL80211_ATTR_SCHED_SCAN_MATCH); if (!matches) return -ENOBUFS; for (i = 0; i < req->n_match_sets; i++) { - match = nla_nest_start(msg, i); + match = nla_nest_start_noflag(msg, i); if (!match) return -ENOBUFS; @@ -10990,12 +11004,12 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, nla_nest_end(msg, matches); } - scan_plans = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_PLANS); + scan_plans = nla_nest_start_noflag(msg, NL80211_ATTR_SCHED_SCAN_PLANS); if (!scan_plans) return -ENOBUFS; for (i = 0; i < req->n_scan_plans; i++) { - scan_plan = nla_nest_start(msg, i + 1); + scan_plan = nla_nest_start_noflag(msg, i + 1); if (!scan_plan) return -ENOBUFS; @@ -11044,7 +11058,8 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; - nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + nl_wowlan = nla_nest_start_noflag(msg, + NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; @@ -11478,12 +11493,12 @@ static int nl80211_send_coalesce_rules(struct sk_buff *msg, if (!rdev->coalesce->n_rules) return 0; - nl_rules = nla_nest_start(msg, NL80211_ATTR_COALESCE_RULE); + nl_rules = nla_nest_start_noflag(msg, NL80211_ATTR_COALESCE_RULE); if (!nl_rules) return -ENOBUFS; for (i = 0; i < rdev->coalesce->n_rules; i++) { - nl_rule = nla_nest_start(msg, i + 1); + nl_rule = nla_nest_start_noflag(msg, i + 1); if (!nl_rule) return -ENOBUFS; @@ -11496,13 +11511,13 @@ static int nl80211_send_coalesce_rules(struct sk_buff *msg, rule->condition)) return -ENOBUFS; - nl_pats = nla_nest_start(msg, - NL80211_ATTR_COALESCE_RULE_PKT_PATTERN); + nl_pats = nla_nest_start_noflag(msg, + NL80211_ATTR_COALESCE_RULE_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (j = 0; j < rule->n_patterns; j++) { - nl_pat = nla_nest_start(msg, j + 1); + nl_pat = nla_nest_start_noflag(msg, j + 1); if (!nl_pat) return -ENOBUFS; pat_len = rule->patterns[j].pattern_len; @@ -12254,7 +12269,7 @@ out: NL80211_ATTR_PAD)) goto nla_put_failure; - func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; @@ -12371,11 +12386,12 @@ void cfg80211_nan_match(struct wireless_dev *wdev, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) goto nla_put_failure; - match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH); + match_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_MATCH); if (!match_attr) goto nla_put_failure; - local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL); + local_func_attr = nla_nest_start_noflag(msg, + NL80211_NAN_MATCH_FUNC_LOCAL); if (!local_func_attr) goto nla_put_failure; @@ -12384,7 +12400,8 @@ void cfg80211_nan_match(struct wireless_dev *wdev, nla_nest_end(msg, local_func_attr); - peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER); + peer_func_attr = nla_nest_start_noflag(msg, + NL80211_NAN_MATCH_FUNC_PEER); if (!peer_func_attr) goto nla_put_failure; @@ -12450,7 +12467,7 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev, NL80211_ATTR_PAD)) goto nla_put_failure; - func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; @@ -12799,7 +12816,8 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, break; } - vendor_data = nla_nest_start(skb, NL80211_ATTR_VENDOR_DATA); + vendor_data = nla_nest_start_noflag(skb, + NL80211_ATTR_VENDOR_DATA); if (!vendor_data) { genlmsg_cancel(skb, hdr); break; @@ -13343,7 +13361,8 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; - ftm_stats_attr = nla_nest_start(msg, NL80211_ATTR_FTM_RESPONDER_STATS); + ftm_stats_attr = nla_nest_start_noflag(msg, + NL80211_ATTR_FTM_RESPONDER_STATS); if (!ftm_stats_attr) goto nla_put_failure; @@ -14366,7 +14385,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, if (WARN_ON(!req)) return 0; - nest = nla_nest_start(msg, NL80211_ATTR_SCAN_SSIDS); + nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_ssids; i++) { @@ -14375,7 +14394,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, } nla_nest_end(msg, nest); - nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES); + nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_channels; i++) { @@ -14637,7 +14656,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, if (uapsd_queues >= 0) { struct nlattr *nla_wmm = - nla_nest_start(msg, NL80211_ATTR_STA_WME); + nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME); if (!nla_wmm) goto nla_put_failure; @@ -15078,7 +15097,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, goto nla_put_failure; /* Before */ - nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE); + nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_BEFORE); if (!nl_freq) goto nla_put_failure; @@ -15087,7 +15106,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, nla_nest_end(msg, nl_freq); /* After */ - nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER); + nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_AFTER); if (!nl_freq) goto nla_put_failure; @@ -15521,7 +15540,7 @@ static struct sk_buff *cfg80211_prepare_cqm(struct net_device *dev, if (mac && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac)) goto nla_put_failure; - cb[1] = nla_nest_start(msg, NL80211_ATTR_CQM); + cb[1] = nla_nest_start_noflag(msg, NL80211_ATTR_CQM); if (!cb[1]) goto nla_put_failure; @@ -15682,7 +15701,7 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) goto nla_put_failure; - rekey_attr = nla_nest_start(msg, NL80211_ATTR_REKEY_DATA); + rekey_attr = nla_nest_start_noflag(msg, NL80211_ATTR_REKEY_DATA); if (!rekey_attr) goto nla_put_failure; @@ -15737,7 +15756,7 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; - attr = nla_nest_start(msg, NL80211_ATTR_PMKSA_CANDIDATE); + attr = nla_nest_start_noflag(msg, NL80211_ATTR_PMKSA_CANDIDATE); if (!attr) goto nla_put_failure; @@ -16047,15 +16066,15 @@ static int cfg80211_net_detect_results(struct sk_buff *msg, struct nlattr *nl_results, *nl_match, *nl_freqs; int i, j; - nl_results = nla_nest_start( - msg, NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS); + nl_results = nla_nest_start_noflag(msg, + NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS); if (!nl_results) return -EMSGSIZE; for (i = 0; i < nd->n_matches; i++) { struct cfg80211_wowlan_nd_match *match = nd->matches[i]; - nl_match = nla_nest_start(msg, i); + nl_match = nla_nest_start_noflag(msg, i); if (!nl_match) break; @@ -16073,8 +16092,8 @@ static int cfg80211_net_detect_results(struct sk_buff *msg, } if (match->n_channels) { - nl_freqs = nla_nest_start( - msg, NL80211_ATTR_SCAN_FREQUENCIES); + nl_freqs = nla_nest_start_noflag(msg, + NL80211_ATTR_SCAN_FREQUENCIES); if (!nl_freqs) { nla_nest_cancel(msg, nl_match); goto out; @@ -16133,7 +16152,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, if (wakeup) { struct nlattr *reasons; - reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + reasons = nla_nest_start_noflag(msg, + NL80211_ATTR_WOWLAN_TRIGGERS); if (!reasons) goto free_msg; diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 5e2ab01d325c..5c80bccc8b3c 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -420,22 +420,22 @@ static int nl80211_pmsr_send_result(struct sk_buff *msg, { struct nlattr *pmsr, *peers, *peer, *resp, *data, *typedata; - pmsr = nla_nest_start(msg, NL80211_ATTR_PEER_MEASUREMENTS); + pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS); if (!pmsr) goto error; - peers = nla_nest_start(msg, NL80211_PMSR_ATTR_PEERS); + peers = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_PEERS); if (!peers) goto error; - peer = nla_nest_start(msg, 1); + peer = nla_nest_start_noflag(msg, 1); if (!peer) goto error; if (nla_put(msg, NL80211_PMSR_PEER_ATTR_ADDR, ETH_ALEN, res->addr)) goto error; - resp = nla_nest_start(msg, NL80211_PMSR_PEER_ATTR_RESP); + resp = nla_nest_start_noflag(msg, NL80211_PMSR_PEER_ATTR_RESP); if (!resp) goto error; @@ -452,11 +452,11 @@ static int nl80211_pmsr_send_result(struct sk_buff *msg, if (res->final && nla_put_flag(msg, NL80211_PMSR_RESP_ATTR_FINAL)) goto error; - data = nla_nest_start(msg, NL80211_PMSR_RESP_ATTR_DATA); + data = nla_nest_start_noflag(msg, NL80211_PMSR_RESP_ATTR_DATA); if (!data) goto error; - typedata = nla_nest_start(msg, res->type); + typedata = nla_nest_start_noflag(msg, res->type); if (!typedata) goto error; -- cgit From 8cb081746c031fb164089322e2336a0bf5b3070c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Apr 2019 14:07:28 +0200 Subject: netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- crypto/crypto_user_base.c | 4 +- drivers/block/drbd/drbd_nla.c | 3 +- drivers/block/nbd.c | 12 +- drivers/infiniband/core/addr.c | 4 +- drivers/infiniband/core/iwpm_util.c | 8 +- drivers/infiniband/core/nldev.c | 36 ++-- drivers/infiniband/core/sa_query.c | 8 +- drivers/net/ieee802154/mac802154_hwsim.c | 12 +- drivers/net/macsec.c | 8 +- drivers/net/team/team.c | 8 +- drivers/net/wireless/ath/ath10k/testmode.c | 4 +- drivers/net/wireless/ath/ath6kl/testmode.c | 4 +- drivers/net/wireless/ath/wcn36xx/testmode.c | 4 +- drivers/net/wireless/ath/wil6210/cfg80211.c | 24 ++- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 +- drivers/net/wireless/mac80211_hwsim.c | 8 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 4 +- drivers/net/wireless/ti/wlcore/testmode.c | 4 +- drivers/net/wireless/ti/wlcore/vendor_cmd.c | 8 +- include/net/genetlink.h | 16 +- include/net/netlink.h | 238 +++++++++++++++++----- kernel/taskstats.c | 5 +- lib/nlattr.c | 171 ++++++++-------- net/8021q/vlan_netlink.c | 4 +- net/bridge/br_mdb.c | 4 +- net/bridge/br_netlink.c | 6 +- net/bridge/br_netlink_tunnel.c | 4 +- net/can/gw.c | 4 +- net/core/devlink.c | 7 +- net/core/fib_rules.c | 6 +- net/core/lwt_bpf.c | 7 +- net/core/neighbour.c | 25 ++- net/core/net_namespace.c | 19 +- net/core/rtnetlink.c | 107 ++++++---- net/dcb/dcbnl.c | 90 ++++---- net/decnet/dn_dev.c | 8 +- net/decnet/dn_fib.c | 8 +- net/decnet/dn_route.c | 4 +- net/ieee802154/nl802154.c | 46 ++--- net/ipv4/devinet.c | 27 +-- net/ipv4/fib_frontend.c | 8 +- net/ipv4/ip_tunnel_core.c | 8 +- net/ipv4/ipmr.c | 12 +- net/ipv4/route.c | 8 +- net/ipv6/addrconf.c | 36 ++-- net/ipv6/addrlabel.c | 12 +- net/ipv6/ila/ila_lwt.c | 3 +- net/ipv6/route.c | 12 +- net/ipv6/seg6_iptunnel.c | 4 +- net/ipv6/seg6_local.c | 9 +- net/mpls/af_mpls.c | 26 +-- net/mpls/mpls_iptunnel.c | 4 +- net/ncsi/ncsi-netlink.c | 4 +- net/netfilter/ipset/ip_set_core.c | 36 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 13 +- net/netfilter/nf_conntrack_netlink.c | 45 ++-- net/netfilter/nf_conntrack_proto_dccp.c | 4 +- net/netfilter/nf_conntrack_proto_sctp.c | 4 +- net/netfilter/nf_conntrack_proto_tcp.c | 4 +- net/netfilter/nf_nat_core.c | 7 +- net/netfilter/nf_tables_api.c | 48 +++-- net/netfilter/nfnetlink.c | 15 +- net/netfilter/nfnetlink_acct.c | 4 +- net/netfilter/nfnetlink_cthelper.c | 22 +- net/netfilter/nfnetlink_cttimeout.c | 7 +- net/netfilter/nfnetlink_queue.c | 5 +- net/netfilter/nft_compat.c | 4 +- net/netfilter/nft_ct.c | 8 +- net/netfilter/nft_tunnel.c | 21 +- net/netlabel/netlabel_cipso_v4.c | 36 ++-- net/netlink/genetlink.c | 5 +- net/nfc/netlink.c | 12 +- net/openvswitch/datapath.c | 4 +- net/openvswitch/flow_netlink.c | 9 +- net/openvswitch/meter.c | 6 +- net/openvswitch/vport-vxlan.c | 4 +- net/phonet/pn_netlink.c | 8 +- net/qrtr/qrtr.c | 3 +- net/sched/act_api.c | 26 +-- net/sched/act_bpf.c | 3 +- net/sched/act_connmark.c | 4 +- net/sched/act_csum.c | 3 +- net/sched/act_gact.c | 3 +- net/sched/act_ife.c | 8 +- net/sched/act_ipt.c | 3 +- net/sched/act_mirred.c | 3 +- net/sched/act_nat.c | 3 +- net/sched/act_pedit.c | 8 +- net/sched/act_police.c | 3 +- net/sched/act_sample.c | 3 +- net/sched/act_simple.c | 3 +- net/sched/act_skbedit.c | 3 +- net/sched/act_skbmod.c | 3 +- net/sched/act_tunnel_key.c | 13 +- net/sched/act_vlan.c | 3 +- net/sched/cls_api.c | 20 +- net/sched/cls_basic.c | 4 +- net/sched/cls_bpf.c | 4 +- net/sched/cls_cgroup.c | 5 +- net/sched/cls_flow.c | 3 +- net/sched/cls_flower.c | 25 +-- net/sched/cls_fw.c | 3 +- net/sched/cls_matchall.c | 4 +- net/sched/cls_route.c | 3 +- net/sched/cls_rsvp.h | 3 +- net/sched/cls_tcindex.c | 3 +- net/sched/cls_u32.c | 3 +- net/sched/em_ipt.c | 4 +- net/sched/em_meta.c | 3 +- net/sched/ematch.c | 3 +- net/sched/sch_api.c | 19 +- net/sched/sch_atm.c | 3 +- net/sched/sch_cake.c | 3 +- net/sched/sch_cbq.c | 6 +- net/sched/sch_cbs.c | 3 +- net/sched/sch_choke.c | 3 +- net/sched/sch_codel.c | 3 +- net/sched/sch_drr.c | 3 +- net/sched/sch_dsmark.c | 6 +- net/sched/sch_etf.c | 3 +- net/sched/sch_fq.c | 3 +- net/sched/sch_fq_codel.c | 4 +- net/sched/sch_gred.c | 17 +- net/sched/sch_hfsc.c | 3 +- net/sched/sch_hhf.c | 3 +- net/sched/sch_htb.c | 6 +- net/sched/sch_mqprio.c | 5 +- net/sched/sch_netem.c | 5 +- net/sched/sch_pie.c | 3 +- net/sched/sch_qfq.c | 4 +- net/sched/sch_red.c | 3 +- net/sched/sch_sfb.c | 3 +- net/sched/sch_taprio.c | 19 +- net/sched/sch_tbf.c | 3 +- net/tipc/bearer.c | 42 ++-- net/tipc/link.c | 4 +- net/tipc/net.c | 6 +- net/tipc/netlink.c | 4 +- net/tipc/netlink_compat.c | 63 +++--- net/tipc/node.c | 38 ++-- net/tipc/socket.c | 6 +- net/tipc/udp_media.c | 13 +- net/wireless/nl80211.c | 191 +++++++++-------- net/wireless/pmsr.c | 18 +- net/xfrm/xfrm_user.c | 10 +- 145 files changed, 1233 insertions(+), 933 deletions(-) (limited to 'kernel') diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c index f25d3f32c9c2..e48da3b75c71 100644 --- a/crypto/crypto_user_base.c +++ b/crypto/crypto_user_base.c @@ -465,8 +465,8 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } - err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, - crypto_policy, extack); + err = nlmsg_parse_deprecated(nlh, crypto_msg_min[type], attrs, + CRYPTOCFGA_MAX, crypto_policy, extack); if (err < 0) return err; diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c index 8e261cb5198b..6a09b0b98018 100644 --- a/drivers/block/drbd/drbd_nla.c +++ b/drivers/block/drbd/drbd_nla.c @@ -35,7 +35,8 @@ int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, err = drbd_nla_check_mandatory(maxtype, nla); if (!err) - err = nla_parse_nested(tb, maxtype, nla, policy, NULL); + err = nla_parse_nested_deprecated(tb, maxtype, nla, policy, + NULL); return err; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index cd27f236431d..69dc11f907a3 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1797,8 +1797,10 @@ again: ret = -EINVAL; goto out; } - ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, - nbd_sock_policy, info->extack); + ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX, + attr, + nbd_sock_policy, + info->extack); if (ret != 0) { printk(KERN_ERR "nbd: error processing sock list\n"); ret = -EINVAL; @@ -1968,8 +1970,10 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) ret = -EINVAL; goto out; } - ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, - nbd_sock_policy, info->extack); + ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX, + attr, + nbd_sock_policy, + info->extack); if (ret != 0) { printk(KERN_ERR "nbd: error processing sock list\n"); ret = -EINVAL; diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f5ecb660fe7d..744b6ec0acb0 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -86,8 +86,8 @@ static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return false; - ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), - nlmsg_len(nlh), ib_nl_addr_policy, NULL); + ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_addr_policy, NULL); if (ret) return false; diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index a5d2a20ee697..41929bb83739 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -506,14 +506,14 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, int ret; const char *err_str = ""; - ret = nlmsg_validate(cb->nlh, nlh_len, policy_max - 1, nlmsg_policy, - NULL); + ret = nlmsg_validate_deprecated(cb->nlh, nlh_len, policy_max - 1, + nlmsg_policy, NULL); if (ret) { err_str = "Invalid attribute"; goto parse_nlmsg_error; } - ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max - 1, - nlmsg_policy, NULL); + ret = nlmsg_parse_deprecated(cb->nlh, nlh_len, nltb, policy_max - 1, + nlmsg_policy, NULL); if (ret) { err_str = "Unable to parse the nlmsg"; goto parse_nlmsg_error; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index ad189a29cc67..85324012bf07 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -608,8 +608,8 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int err; - err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -653,8 +653,8 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int err; - err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, - extack); + err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -722,8 +722,8 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, u32 port; int err; - err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) @@ -778,8 +778,8 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, int err; unsigned int p; - err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NULL); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -833,8 +833,8 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int ret; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; @@ -982,8 +982,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *msg; int ret; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id]) return -EINVAL; @@ -1071,8 +1071,8 @@ static int res_get_common_dumpit(struct sk_buff *skb, u32 index, port = 0; bool filled = false; - err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NULL); /* * Right now, we are expecting the device index to get res information, * but it is possible to extend this code to return all devices in @@ -1250,8 +1250,8 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, char type[IFNAMSIZ]; int err; - err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) return -EINVAL; @@ -1294,8 +1294,8 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, u32 index; int err; - err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); + err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 7925e45ea88a..bb534959abf0 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1028,8 +1028,8 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, !(NETLINK_CB(skb).sk)) return -EPERM; - ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), - nlmsg_len(nlh), ib_nl_policy, NULL); + ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_policy, NULL); attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT]; if (ret || !attr) goto settimeout_out; @@ -1080,8 +1080,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return 0; - ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), - nlmsg_len(nlh), ib_nl_policy, NULL); + ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_policy, NULL); if (ret) return 0; diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 80ca300aba04..486a3a3bf35b 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -430,9 +430,7 @@ static int hwsim_new_edge_nl(struct sk_buff *msg, struct genl_info *info) !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; - if (nla_parse_nested(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, - info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], - hwsim_edge_policy, NULL)) + if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) @@ -494,9 +492,7 @@ static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info) !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; - if (nla_parse_nested(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, - info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], - hwsim_edge_policy, NULL)) + if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) @@ -544,9 +540,7 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; - if (nla_parse_nested(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, - info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], - hwsim_edge_policy, NULL)) + if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] && diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 8dedb9a9781e..c3fa3d8da8f3 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1611,9 +1611,7 @@ static int parse_sa_config(struct nlattr **attrs, struct nlattr **tb_sa) if (!attrs[MACSEC_ATTR_SA_CONFIG]) return -EINVAL; - if (nla_parse_nested(tb_sa, MACSEC_SA_ATTR_MAX, - attrs[MACSEC_ATTR_SA_CONFIG], - macsec_genl_sa_policy, NULL)) + if (nla_parse_nested_deprecated(tb_sa, MACSEC_SA_ATTR_MAX, attrs[MACSEC_ATTR_SA_CONFIG], macsec_genl_sa_policy, NULL)) return -EINVAL; return 0; @@ -1624,9 +1622,7 @@ static int parse_rxsc_config(struct nlattr **attrs, struct nlattr **tb_rxsc) if (!attrs[MACSEC_ATTR_RXSC_CONFIG]) return -EINVAL; - if (nla_parse_nested(tb_rxsc, MACSEC_RXSC_ATTR_MAX, - attrs[MACSEC_ATTR_RXSC_CONFIG], - macsec_genl_rxsc_policy, NULL)) + if (nla_parse_nested_deprecated(tb_rxsc, MACSEC_RXSC_ATTR_MAX, attrs[MACSEC_ATTR_RXSC_CONFIG], macsec_genl_rxsc_policy, NULL)) return -EINVAL; return 0; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6306897c147f..be58445afbbc 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2510,9 +2510,11 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto team_put; } - err = nla_parse_nested(opt_attrs, TEAM_ATTR_OPTION_MAX, - nl_option, team_nl_option_policy, - info->extack); + err = nla_parse_nested_deprecated(opt_attrs, + TEAM_ATTR_OPTION_MAX, + nl_option, + team_nl_option_policy, + info->extack); if (err) goto team_put; if (!opt_attrs[TEAM_ATTR_OPTION_NAME] || diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 6433ff10d80e..a29cfb9c72c2 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -416,8 +416,8 @@ int ath10k_tm_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct nlattr *tb[ATH10K_TM_ATTR_MAX + 1]; int ret; - ret = nla_parse(tb, ATH10K_TM_ATTR_MAX, data, len, ath10k_tm_policy, - NULL); + ret = nla_parse_deprecated(tb, ATH10K_TM_ATTR_MAX, data, len, + ath10k_tm_policy, NULL); if (ret) return ret; diff --git a/drivers/net/wireless/ath/ath6kl/testmode.c b/drivers/net/wireless/ath/ath6kl/testmode.c index d8dcacda9add..f3906dbe5495 100644 --- a/drivers/net/wireless/ath/ath6kl/testmode.c +++ b/drivers/net/wireless/ath/ath6kl/testmode.c @@ -74,8 +74,8 @@ int ath6kl_tm_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, int err, buf_len; void *buf; - err = nla_parse(tb, ATH6KL_TM_ATTR_MAX, data, len, ath6kl_tm_policy, - NULL); + err = nla_parse_deprecated(tb, ATH6KL_TM_ATTR_MAX, data, len, + ath6kl_tm_policy, NULL); if (err) return err; diff --git a/drivers/net/wireless/ath/wcn36xx/testmode.c b/drivers/net/wireless/ath/wcn36xx/testmode.c index 51a038022c8b..7ae14b4d2d0e 100644 --- a/drivers/net/wireless/ath/wcn36xx/testmode.c +++ b/drivers/net/wireless/ath/wcn36xx/testmode.c @@ -132,8 +132,8 @@ int wcn36xx_tm_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif, unsigned short attr; wcn36xx_dbg_dump(WCN36XX_DBG_TESTMODE_DUMP, "Data:", data, len); - ret = nla_parse(tb, WCN36XX_TM_ATTR_MAX, data, len, - wcn36xx_tm_policy, NULL); + ret = nla_parse_deprecated(tb, WCN36XX_TM_ATTR_MAX, data, len, + wcn36xx_tm_policy, NULL); if (ret) return ret; diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index cac18e61474e..9a67ad2a589c 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -2620,8 +2620,8 @@ static int wil_rf_sector_get_cfg(struct wiphy *wiphy, if (!test_bit(WMI_FW_CAPABILITY_RF_SECTORS, wil->fw_capabilities)) return -EOPNOTSUPP; - rc = nla_parse(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, data_len, - wil_rf_sector_policy, NULL); + rc = nla_parse_deprecated(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, + data_len, wil_rf_sector_policy, NULL); if (rc) { wil_err(wil, "Invalid rf sector ATTR\n"); return rc; @@ -2740,8 +2740,8 @@ static int wil_rf_sector_set_cfg(struct wiphy *wiphy, if (!test_bit(WMI_FW_CAPABILITY_RF_SECTORS, wil->fw_capabilities)) return -EOPNOTSUPP; - rc = nla_parse(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, data_len, - wil_rf_sector_policy, NULL); + rc = nla_parse_deprecated(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, + data_len, wil_rf_sector_policy, NULL); if (rc) { wil_err(wil, "Invalid rf sector ATTR\n"); return rc; @@ -2773,9 +2773,11 @@ static int wil_rf_sector_set_cfg(struct wiphy *wiphy, cmd.sector_type = sector_type; nla_for_each_nested(nl_cfg, tb[QCA_ATTR_DMG_RF_SECTOR_CFG], tmp) { - rc = nla_parse_nested(tb2, QCA_ATTR_DMG_RF_SECTOR_CFG_MAX, - nl_cfg, wil_rf_sector_cfg_policy, - NULL); + rc = nla_parse_nested_deprecated(tb2, + QCA_ATTR_DMG_RF_SECTOR_CFG_MAX, + nl_cfg, + wil_rf_sector_cfg_policy, + NULL); if (rc) { wil_err(wil, "invalid sector cfg\n"); return -EINVAL; @@ -2847,8 +2849,8 @@ static int wil_rf_sector_get_selected(struct wiphy *wiphy, if (!test_bit(WMI_FW_CAPABILITY_RF_SECTORS, wil->fw_capabilities)) return -EOPNOTSUPP; - rc = nla_parse(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, data_len, - wil_rf_sector_policy, NULL); + rc = nla_parse_deprecated(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, + data_len, wil_rf_sector_policy, NULL); if (rc) { wil_err(wil, "Invalid rf sector ATTR\n"); return rc; @@ -2955,8 +2957,8 @@ static int wil_rf_sector_set_selected(struct wiphy *wiphy, if (!test_bit(WMI_FW_CAPABILITY_RF_SECTORS, wil->fw_capabilities)) return -EOPNOTSUPP; - rc = nla_parse(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, data_len, - wil_rf_sector_policy, NULL); + rc = nla_parse_deprecated(tb, QCA_ATTR_DMG_RF_SECTOR_MAX, data, + data_len, wil_rf_sector_policy, NULL); if (rc) { wil_err(wil, "Invalid rf sector ATTR\n"); return rc; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index d4c7f08f08e3..5c52469288be 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4465,8 +4465,8 @@ static int __iwl_mvm_mac_testmode_cmd(struct iwl_mvm *mvm, int err; u32 noa_duration; - err = nla_parse(tb, IWL_MVM_TM_ATTR_MAX, data, len, iwl_mvm_tm_policy, - NULL); + err = nla_parse_deprecated(tb, IWL_MVM_TM_ATTR_MAX, data, len, + iwl_mvm_tm_policy, NULL); if (err) return err; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4c2145ba87a0..2a1aa2f6e7dc 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -409,8 +409,8 @@ static int mac80211_hwsim_vendor_cmd_test(struct wiphy *wiphy, int err; u32 val; - err = nla_parse(tb, QCA_WLAN_VENDOR_ATTR_MAX, data, data_len, - hwsim_vendor_test_policy, NULL); + err = nla_parse_deprecated(tb, QCA_WLAN_VENDOR_ATTR_MAX, data, + data_len, hwsim_vendor_test_policy, NULL); if (err) return err; if (!tb[QCA_WLAN_VENDOR_ATTR_TEST]) @@ -1936,8 +1936,8 @@ static int mac80211_hwsim_testmode_cmd(struct ieee80211_hw *hw, struct sk_buff *skb; int err, ps; - err = nla_parse(tb, HWSIM_TM_ATTR_MAX, data, len, - hwsim_testmode_policy, NULL); + err = nla_parse_deprecated(tb, HWSIM_TM_ATTR_MAX, data, len, + hwsim_testmode_policy, NULL); if (err) return err; diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index e582d9b3e50c..e11a4bb67172 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -4059,8 +4059,8 @@ static int mwifiex_tm_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, if (!priv) return -EINVAL; - err = nla_parse(tb, MWIFIEX_TM_ATTR_MAX, data, len, mwifiex_tm_policy, - NULL); + err = nla_parse_deprecated(tb, MWIFIEX_TM_ATTR_MAX, data, len, + mwifiex_tm_policy, NULL); if (err) return err; diff --git a/drivers/net/wireless/ti/wlcore/testmode.c b/drivers/net/wireless/ti/wlcore/testmode.c index dcb2c8b0feb6..affefaaea1ea 100644 --- a/drivers/net/wireless/ti/wlcore/testmode.c +++ b/drivers/net/wireless/ti/wlcore/testmode.c @@ -372,8 +372,8 @@ int wl1271_tm_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 nla_cmd; int err; - err = nla_parse(tb, WL1271_TM_ATTR_MAX, data, len, wl1271_tm_policy, - NULL); + err = nla_parse_deprecated(tb, WL1271_TM_ATTR_MAX, data, len, + wl1271_tm_policy, NULL); if (err) return err; diff --git a/drivers/net/wireless/ti/wlcore/vendor_cmd.c b/drivers/net/wireless/ti/wlcore/vendor_cmd.c index 7f34ec077ee5..75756fb8e7b0 100644 --- a/drivers/net/wireless/ti/wlcore/vendor_cmd.c +++ b/drivers/net/wireless/ti/wlcore/vendor_cmd.c @@ -41,8 +41,8 @@ wlcore_vendor_cmd_smart_config_start(struct wiphy *wiphy, if (!data) return -EINVAL; - ret = nla_parse(tb, MAX_WLCORE_VENDOR_ATTR, data, data_len, - wlcore_vendor_attr_policy, NULL); + ret = nla_parse_deprecated(tb, MAX_WLCORE_VENDOR_ATTR, data, data_len, + wlcore_vendor_attr_policy, NULL); if (ret) return ret; @@ -122,8 +122,8 @@ wlcore_vendor_cmd_smart_config_set_group_key(struct wiphy *wiphy, if (!data) return -EINVAL; - ret = nla_parse(tb, MAX_WLCORE_VENDOR_ATTR, data, data_len, - wlcore_vendor_attr_policy, NULL); + ret = nla_parse_deprecated(tb, MAX_WLCORE_VENDOR_ATTR, data, data_len, + wlcore_vendor_attr_policy, NULL); if (ret) return ret; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 6850c7b1a3a6..897cdba13569 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -165,7 +165,7 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) } /** - * genlmsg_parse - parse attributes of a genetlink message + * genlmsg_parse_deprecated - parse attributes of a genetlink message * @nlh: netlink message header * @family: genetlink message family * @tb: destination array with maxtype+1 elements @@ -173,14 +173,14 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) * @policy: validation policy * @extack: extended ACK report struct */ -static inline int genlmsg_parse(const struct nlmsghdr *nlh, - const struct genl_family *family, - struct nlattr *tb[], int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int genlmsg_parse_deprecated(const struct nlmsghdr *nlh, + const struct genl_family *family, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, - policy, extack); + return __nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype, + policy, NL_VALIDATE_LIBERAL, extack); } /** diff --git a/include/net/netlink.h b/include/net/netlink.h index c77ed51c18f1..ab26a5e3558b 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -369,21 +369,48 @@ struct nl_info { bool skip_notify; }; +/** + * enum netlink_validation - netlink message/attribute validation levels + * @NL_VALIDATE_LIBERAL: Old-style "be liberal" validation, not caring about + * extra data at the end of the message, attributes being longer than + * they should be, or unknown attributes being present. + * @NL_VALIDATE_TRAILING: Reject junk data encountered after attribute parsing. + * @NL_VALIDATE_MAXTYPE: Reject attributes > max type; Together with _TRAILING + * this is equivalent to the old nla_parse_strict()/nlmsg_parse_strict(). + * @NL_VALIDATE_UNSPEC: Reject attributes with NLA_UNSPEC in the policy. + * This can safely be set by the kernel when the given policy has no + * NLA_UNSPEC anymore, and can thus be used to ensure policy entries + * are enforced going forward. + * @NL_VALIDATE_STRICT_ATTRS: strict attribute policy parsing (e.g. + * U8, U16, U32 must have exact size, etc.) + */ +enum netlink_validation { + NL_VALIDATE_LIBERAL = 0, + NL_VALIDATE_TRAILING = BIT(0), + NL_VALIDATE_MAXTYPE = BIT(1), + NL_VALIDATE_UNSPEC = BIT(2), + NL_VALIDATE_STRICT_ATTRS = BIT(3), +}; + +#define NL_VALIDATE_DEPRECATED_STRICT (NL_VALIDATE_TRAILING |\ + NL_VALIDATE_MAXTYPE) +#define NL_VALIDATE_STRICT (NL_VALIDATE_TRAILING |\ + NL_VALIDATE_MAXTYPE |\ + NL_VALIDATE_UNSPEC |\ + NL_VALIDATE_STRICT_ATTRS) + int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *)); int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags); -int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack); -int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack); -int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack); +int __nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack); +int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack); int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); @@ -512,42 +539,121 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining) } /** - * nlmsg_parse - parse attributes of a netlink message + * nla_parse_deprecated - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be ignored and attributes from the policy are not + * always strictly validated (only for new attributes). + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_LIBERAL, extack); +} + +/** + * nla_parse_deprecated_strict - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * @extack: extended ACK pointer + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessible via the attribute type. Attributes with a type + * exceeding maxtype will be rejected as well as trailing data, but the + * policy is not completely strictly validated (only for new attributes). + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype, + const struct nlattr *head, + int len, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_parse(tb, maxtype, head, len, policy, + NL_VALIDATE_DEPRECATED_STRICT, extack); +} + +/** + * __nlmsg_parse - parse attributes of a netlink message * @nlh: netlink message header * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * See nla_parse() */ -static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr *tb[], int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { NL_SET_ERR_MSG(extack, "Invalid header length"); return -EINVAL; } - return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, extack); + return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), policy, validate, + extack); } -static inline int nlmsg_parse_strict(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr *tb[], int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +/** + * nlmsg_parse_deprecated - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @extack: extended ACK report struct + * + * See nla_parse_deprecated() + */ +static inline int nlmsg_parse_deprecated(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) { - NL_SET_ERR_MSG(extack, "Invalid header length"); - return -EINVAL; - } + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_LIBERAL, extack); +} - return nla_parse_strict(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, extack); +/** + * nlmsg_parse_deprecated_strict - parse attributes of a netlink message + * @nlh: netlink message header + * @hdrlen: length of family specific header + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @extack: extended ACK report struct + * + * See nla_parse_deprecated_strict() + */ +static inline int +nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr *tb[], int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_DEPRECATED_STRICT, extack); } /** @@ -566,26 +672,53 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, } /** - * nlmsg_validate - validate a netlink message including attributes + * nla_validate_deprecated - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK report struct + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Validation is done in liberal mode. + * See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate_deprecated(const struct nlattr *head, int len, + int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_validate(head, len, maxtype, policy, NL_VALIDATE_LIBERAL, + extack); +} + + +/** + * nlmsg_validate_deprecated - validate a netlink message including attributes * @nlh: netlinket message header * @hdrlen: length of familiy specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct */ -static inline int nlmsg_validate(const struct nlmsghdr *nlh, - int hdrlen, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, + int hdrlen, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; - return nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), maxtype, policy, - extack); + return __nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), maxtype, + policy, NL_VALIDATE_LIBERAL, extack); } + + /** * nlmsg_report - need to report back to application? * @nlh: netlink message header @@ -899,22 +1032,22 @@ nla_find_nested(const struct nlattr *nla, int attrtype) } /** - * nla_parse_nested - parse nested attributes + * nla_parse_nested_deprecated - parse nested attributes * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @nla: attribute containing the nested attributes * @policy: validation policy * @extack: extended ACK report struct * - * See nla_parse() + * See nla_parse_deprecated() */ -static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, - const struct nlattr *nla, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int nla_parse_nested_deprecated(struct nlattr *tb[], int maxtype, + const struct nlattr *nla, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, - extack); + return __nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy, + NL_VALIDATE_LIBERAL, extack); } /** @@ -1489,6 +1622,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * @start: container attribute * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the nested attribute stream against the @@ -1497,12 +1631,22 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * * Returns 0 on success or a negative error code. */ -static inline int nla_validate_nested(const struct nlattr *start, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack) +{ + return __nla_validate(nla_data(start), nla_len(start), maxtype, policy, + validate, extack); +} + +static inline int +nla_validate_nested_deprecated(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) { - return nla_validate(nla_data(start), nla_len(start), maxtype, policy, - extack); + return __nla_validate_nested(start, maxtype, policy, + NL_VALIDATE_LIBERAL, extack); } /** diff --git a/kernel/taskstats.c b/kernel/taskstats.c index ef4f9cd980fd..0e347f1c7800 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -677,8 +677,9 @@ static int taskstats_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, return -EINVAL; } - return nlmsg_validate(info->nlhdr, GENL_HDRLEN, TASKSTATS_CMD_ATTR_MAX, - policy, info->extack); + return nlmsg_validate_deprecated(info->nlhdr, GENL_HDRLEN, + TASKSTATS_CMD_ATTR_MAX, policy, + info->extack); } static struct genl_family family __ro_after_init = { diff --git a/lib/nlattr.c b/lib/nlattr.c index 465c9e8ef8a5..af0f8b0309c6 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -69,7 +69,8 @@ static int validate_nla_bitfield32(const struct nlattr *nla, static int nla_validate_array(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + unsigned int validate) { const struct nlattr *entry; int rem; @@ -86,8 +87,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype, return -ERANGE; } - ret = nla_validate(nla_data(entry), nla_len(entry), - maxtype, policy, extack); + ret = __nla_validate(nla_data(entry), nla_len(entry), + maxtype, policy, validate, extack); if (ret < 0) return ret; } @@ -154,7 +155,7 @@ static int nla_validate_int_range(const struct nla_policy *pt, } static int validate_nla(const struct nlattr *nla, int maxtype, - const struct nla_policy *policy, + const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { const struct nla_policy *pt; @@ -172,6 +173,11 @@ static int validate_nla(const struct nlattr *nla, int maxtype, (pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); + if (validate & NL_VALIDATE_STRICT_ATTRS) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "invalid attribute length"); + return -EINVAL; + } } switch (pt->type) { @@ -244,8 +250,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (attrlen < NLA_HDRLEN) goto out_err; if (pt->validation_data) { - err = nla_validate(nla_data(nla), nla_len(nla), pt->len, - pt->validation_data, extack); + err = __nla_validate(nla_data(nla), nla_len(nla), pt->len, + pt->validation_data, validate, + extack); if (err < 0) { /* * return directly to preserve the inner @@ -268,7 +275,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, err = nla_validate_array(nla_data(nla), nla_len(nla), pt->len, pt->validation_data, - extack); + extack, validate); if (err < 0) { /* * return directly to preserve the inner @@ -280,6 +287,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype, break; case NLA_UNSPEC: + if (validate & NL_VALIDATE_UNSPEC) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "Unsupported attribute"); + return -EINVAL; + } + /* fall through */ case NLA_MIN_LEN: if (attrlen < pt->len) goto out_err; @@ -322,37 +335,75 @@ out_err: return err; } +static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, + unsigned int validate, + struct netlink_ext_ack *extack, + struct nlattr **tb) +{ + const struct nlattr *nla; + int rem; + + if (tb) + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla_type(nla); + + if (type == 0 || type > maxtype) { + if (validate & NL_VALIDATE_MAXTYPE) { + NL_SET_ERR_MSG(extack, "Unknown attribute type"); + return -EINVAL; + } + continue; + } + if (policy) { + int err = validate_nla(nla, maxtype, policy, + validate, extack); + + if (err < 0) + return err; + } + + if (tb) + tb[type] = (struct nlattr *)nla; + } + + if (unlikely(rem > 0)) { + pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", + rem, current->comm); + NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); + if (validate & NL_VALIDATE_TRAILING) + return -EINVAL; + } + + return 0; +} + /** - * nla_validate - Validate a stream of attributes + * __nla_validate - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy + * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the - * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. + * specified policy. Validation depends on the validate flags passed, see + * &enum netlink_validation for more details on that. + * See documenation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ -int nla_validate(const struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_validate(const struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - const struct nlattr *nla; - int rem; - - nla_for_each_attr(nla, head, len, rem) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - return 0; + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, NULL); } -EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(__nla_validate); /** * nla_policy_len - Determin the max. length of a policy @@ -384,76 +435,30 @@ nla_policy_len(const struct nla_policy *p, int n) EXPORT_SYMBOL(nla_policy_len); /** - * nla_parse - Parse a stream of attributes into a tb buffer + * __nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy + * @validate: validation strictness + * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessible via the attribute type. Attributes with a type - * exceeding maxtype will be silently ignored for backwards compatibility - * reasons. policy may be set to NULL if no validation is required. + * the tb array accessible via the attribute type. + * Validation is controlled by the @validate parameter. * * Returns 0 on success or a negative error code. */ -static int __nla_parse(struct nlattr **tb, int maxtype, - const struct nlattr *head, int len, - bool strict, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - const struct nlattr *nla; - int rem; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - nla_for_each_attr(nla, head, len, rem) { - u16 type = nla_type(nla); - - if (type == 0 || type > maxtype) { - if (strict) { - NL_SET_ERR_MSG(extack, "Unknown attribute type"); - return -EINVAL; - } - continue; - } - if (policy) { - int err = validate_nla(nla, maxtype, policy, extack); - - if (err < 0) - return err; - } - - tb[type] = (struct nlattr *)nla; - } - - if (unlikely(rem > 0)) { - pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", - rem, current->comm); - NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); - if (strict) - return -EINVAL; - } - - return 0; -} - -int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) -{ - return __nla_parse(tb, maxtype, head, len, false, policy, extack); -} -EXPORT_SYMBOL(nla_parse); - -int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, - int len, const struct nla_policy *policy, - struct netlink_ext_ack *extack) +int __nla_parse(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, + const struct nla_policy *policy, unsigned int validate, + struct netlink_ext_ack *extack) { - return __nla_parse(tb, maxtype, head, len, true, policy, extack); + return __nla_validate_parse(head, len, maxtype, policy, validate, + extack, tb); } -EXPORT_SYMBOL(nla_parse_strict); +EXPORT_SYMBOL(__nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index ab4921e7797b..24eebbc92364 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -35,8 +35,8 @@ static inline int vlan_validate_qos_map(struct nlattr *attr) { if (!attr) return 0; - return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy, - NULL); + return nla_validate_nested_deprecated(attr, IFLA_VLAN_QOS_MAX, + vlan_map_policy, NULL); } static int vlan_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 3619c1a12a77..bf6acd34234d 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -530,8 +530,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *dev; int err; - err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL, - NULL); + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, NULL, NULL); if (err < 0) return err; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 348ddb6d09bb..a5acad29cd4f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -880,8 +880,10 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, if (p && protinfo) { if (protinfo->nla_type & NLA_F_NESTED) { - err = nla_parse_nested(tb, IFLA_BRPORT_MAX, protinfo, - br_port_policy, NULL); + err = nla_parse_nested_deprecated(tb, IFLA_BRPORT_MAX, + protinfo, + br_port_policy, + NULL); if (err) return err; diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index 787e140dc4b5..34629d558709 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -230,8 +230,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, memset(tinfo, 0, sizeof(*tinfo)); - err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, attr, - vlan_tunnel_policy, NULL); + err = nla_parse_nested_deprecated(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, + attr, vlan_tunnel_policy, NULL); if (err < 0) return err; diff --git a/net/can/gw.c b/net/can/gw.c index 53859346dc9a..5275ddf580bc 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -662,8 +662,8 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, /* initialize modification & checksum data space */ memset(mod, 0, sizeof(*mod)); - err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, - cgw_policy, NULL); + err = nlmsg_parse_deprecated(nlh, sizeof(struct rtcanmsg), tb, + CGW_MAX, cgw_policy, NULL); if (err < 0) return err; diff --git a/net/core/devlink.c b/net/core/devlink.c index b94f326f5f06..b020d182c9fc 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3674,9 +3674,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, if (!attrs) return -ENOMEM; - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, devlink_nl_family.policy, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + devlink_nl_family.hdrsize, + attrs, DEVLINK_ATTR_MAX, + devlink_nl_family.policy, cb->extack); if (err) goto out_free; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ffbb827723a2..18f8dd8329ed 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -746,7 +746,8 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, + ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; @@ -853,7 +854,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*frh), tb, FRA_MAX, + ops->policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index bbdfc8db1960..1c94f529f4a1 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -343,8 +343,8 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, int ret; u32 fd; - ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr, + bpf_prog_policy, NULL); if (ret < 0) return ret; @@ -385,7 +385,8 @@ static int bpf_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EAFNOSUPPORT; - ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack); + ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy, + extack); if (ret < 0) return ret; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index efd0b53d9ca4..e73bfc63e473 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1862,7 +1862,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, int err; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, + nda_policy, extack); if (err < 0) goto out; @@ -2181,8 +2182,8 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, bool found = false; int err, tidx; - err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, - nl_neightbl_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, + nl_neightbl_policy, extack); if (err < 0) goto errout; @@ -2219,8 +2220,9 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, struct neigh_parms *p; int i, ifindex = 0; - err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], - nl_ntbl_parm_policy, extack); + err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, + tb[NDTA_PARMS], + nl_ntbl_parm_policy, extack); if (err < 0) goto errout_tbl_lock; @@ -2660,11 +2662,12 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), + tb, NDA_MAX, nda_policy, + extack); } else { - err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, nda_policy, extack); } if (err < 0) return err; @@ -2764,8 +2767,8 @@ static int neigh_valid_get_req(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, nda_policy, extack); if (err < 0) return err; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ebb5b6d21a13..711b161505ac 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -682,8 +682,8 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *peer; int nsid, err; - err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb, + NETNSA_MAX, rtnl_net_policy, extack); if (err < 0) return err; if (!tb[NETNSA_NSID]) { @@ -787,11 +787,13 @@ static int rtnl_net_valid_getid_req(struct sk_buff *skb, int i, err; if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), + tb, NETNSA_MAX, rtnl_net_policy, + extack); - err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, + NETNSA_MAX, rtnl_net_policy, + extack); if (err) return err; @@ -929,8 +931,9 @@ static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk, struct nlattr *tb[NETNSA_MAX + 1]; int err, i; - err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, + NETNSA_MAX, rtnl_net_policy, + extack); if (err < 0) return err; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8ad44b299e72..2bd12afb9297 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1797,8 +1797,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla const struct rtnl_link_ops *ops = NULL; struct nlattr *linfo[IFLA_INFO_MAX + 1]; - if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, - ifla_info_policy, NULL) < 0) + if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0) return NULL; if (linfo[IFLA_INFO_KIND]) { @@ -1897,8 +1896,9 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, return -EINVAL; } - return nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, - ifla_policy, extack); + return nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, + IFLA_MAX, ifla_policy, + extack); } /* A hack to preserve kernel<->userspace interface. @@ -1911,7 +1911,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - return nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, extack); + return nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, + extack); } static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) @@ -2019,7 +2020,8 @@ out_err: int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr) { - return nla_parse(tb, IFLA_MAX, head, len, ifla_policy, exterr); + return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy, + exterr); } EXPORT_SYMBOL(rtnl_nla_parse_ifla); @@ -2564,8 +2566,10 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } - err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr, - ifla_vf_policy, NULL); + err = nla_parse_nested_deprecated(vfinfo, IFLA_VF_MAX, + attr, + ifla_vf_policy, + NULL); if (err < 0) goto errout; err = do_setvfinfo(dev, vfinfo); @@ -2592,8 +2596,10 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } - err = nla_parse_nested(port, IFLA_PORT_MAX, attr, - ifla_port_policy, NULL); + err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, + attr, + ifla_port_policy, + NULL); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { @@ -2612,9 +2618,9 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_PORT_SELF]) { struct nlattr *port[IFLA_PORT_MAX+1]; - err = nla_parse_nested(port, IFLA_PORT_MAX, - tb[IFLA_PORT_SELF], ifla_port_policy, - NULL); + err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, + tb[IFLA_PORT_SELF], + ifla_port_policy, NULL); if (err < 0) goto errout; @@ -2661,8 +2667,9 @@ static int do_setlink(const struct sk_buff *skb, struct nlattr *xdp[IFLA_XDP_MAX + 1]; u32 xdp_flags = 0; - err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], - ifla_xdp_policy, NULL); + err = nla_parse_nested_deprecated(xdp, IFLA_XDP_MAX, + tb[IFLA_XDP], + ifla_xdp_policy, NULL); if (err < 0) goto errout; @@ -2716,8 +2723,8 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *tb[IFLA_MAX+1]; char ifname[IFNAMSIZ]; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) goto errout; @@ -2813,7 +2820,8 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, int err; int netnsid = -1; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) return err; @@ -2990,7 +2998,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_MODULES replay: #endif - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err < 0) return err; @@ -3024,9 +3033,9 @@ replay: return err; if (tb[IFLA_LINKINFO]) { - err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, - tb[IFLA_LINKINFO], ifla_info_policy, - NULL); + err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, + tb[IFLA_LINKINFO], + ifla_info_policy, NULL); if (err < 0) return err; } else @@ -3046,9 +3055,9 @@ replay: return -EINVAL; if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { - err = nla_parse_nested(attr, ops->maxtype, - linkinfo[IFLA_INFO_DATA], - ops->policy, extack); + err = nla_parse_nested_deprecated(attr, ops->maxtype, + linkinfo[IFLA_INFO_DATA], + ops->policy, extack); if (err < 0) return err; data = attr; @@ -3067,9 +3076,11 @@ replay: if (m_ops->slave_maxtype && linkinfo[IFLA_INFO_SLAVE_DATA]) { - err = nla_parse_nested(slave_attr, m_ops->slave_maxtype, - linkinfo[IFLA_INFO_SLAVE_DATA], - m_ops->slave_policy, extack); + err = nla_parse_nested_deprecated(slave_attr, + m_ops->slave_maxtype, + linkinfo[IFLA_INFO_SLAVE_DATA], + m_ops->slave_policy, + extack); if (err < 0) return err; slave_data = slave_attr; @@ -3250,8 +3261,8 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || @@ -3260,8 +3271,8 @@ static int rtnl_valid_getlink_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, - extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, + ifla_policy, extack); if (err) return err; @@ -3366,7 +3377,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { + if (nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } @@ -3639,7 +3650,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, u16 vid; int err; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, + extack); if (err < 0) return err; @@ -3749,7 +3761,8 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, + extack); if (err < 0) return err; @@ -3898,8 +3911,8 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - NULL, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, NULL, extack); if (err < 0) return err; @@ -3951,8 +3964,9 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh, nla_attr_size(sizeof(u32)))) { struct ifinfomsg *ifm; - err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); if (err < 0) { return -EINVAL; } else if (err == 0) { @@ -4091,8 +4105,8 @@ static int valid_fdb_get_strict(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, - nda_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, + NDA_MAX, nda_policy, extack); if (err < 0) return err; @@ -4354,11 +4368,14 @@ static int valid_bridge_getlink_req(const struct nlmsghdr *nlh, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, + sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); } else { - err = nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, - IFLA_MAX, ifla_policy, extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), + tb, IFLA_MAX, ifla_policy, + extack); } if (err < 0) return err; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 3fd3aa7348bd..ceff9d22deea 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -241,8 +241,9 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getpfccfg) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -299,8 +300,9 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getcap) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], - dcbnl_cap_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_CAP_ATTR_MAX, + tb[DCB_ATTR_CAP], dcbnl_cap_nest, + NULL); if (ret) return ret; @@ -343,8 +345,9 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->getnumtcs) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_NUMTCS_ATTR_MAX, + tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest, NULL); if (ret) return ret; @@ -388,8 +391,9 @@ static int dcbnl_setnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->setnumtcs) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], - dcbnl_numtcs_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_NUMTCS_ATTR_MAX, + tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest, NULL); if (ret) return ret; @@ -447,8 +451,9 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_APP]) return -EINVAL; - ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest, NULL); + ret = nla_parse_nested_deprecated(app_tb, DCB_APP_ATTR_MAX, + tb[DCB_ATTR_APP], dcbnl_app_nest, + NULL); if (ret) return ret; @@ -515,8 +520,9 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_APP]) return -EINVAL; - ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], - dcbnl_app_nest, NULL); + ret = nla_parse_nested_deprecated(app_tb, DCB_APP_ATTR_MAX, + tb[DCB_ATTR_APP], dcbnl_app_nest, + NULL); if (ret) return ret; @@ -573,8 +579,9 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], - dcbnl_pg_nest, NULL); + ret = nla_parse_nested_deprecated(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest, + NULL); if (ret) return ret; @@ -593,8 +600,9 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, data = pg_tb[DCB_PG_ATTR_TC_ALL]; else data = pg_tb[i]; - ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, data, - dcbnl_tc_param_nest, NULL); + ret = nla_parse_nested_deprecated(param_tb, + DCB_TC_ATTR_PARAM_MAX, data, + dcbnl_tc_param_nest, NULL); if (ret) goto err_pg; @@ -730,8 +738,9 @@ static int dcbnl_setpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!netdev->dcbnl_ops->setpfccfg) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, - tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest, NULL); if (ret) return ret; @@ -786,8 +795,9 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setpgbwgcfgrx) return -EOPNOTSUPP; - ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], - dcbnl_pg_nest, NULL); + ret = nla_parse_nested_deprecated(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest, + NULL); if (ret) return ret; @@ -795,8 +805,10 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!pg_tb[i]) continue; - ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, - pg_tb[i], dcbnl_tc_param_nest, NULL); + ret = nla_parse_nested_deprecated(param_tb, + DCB_TC_ATTR_PARAM_MAX, + pg_tb[i], + dcbnl_tc_param_nest, NULL); if (ret) return ret; @@ -884,8 +896,9 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->getbcncfg) return -EOPNOTSUPP; - ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], - dcbnl_bcn_nest, NULL); + ret = nla_parse_nested_deprecated(bcn_tb, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], dcbnl_bcn_nest, + NULL); if (ret) return ret; @@ -943,8 +956,9 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, !netdev->dcbnl_ops->setbcnrp) return -EOPNOTSUPP; - ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], - dcbnl_pfc_up_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest, + NULL); if (ret) return ret; @@ -1431,8 +1445,9 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], - dcbnl_ieee_policy, NULL); + err = nla_parse_nested_deprecated(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1531,8 +1546,9 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_IEEE]) return -EINVAL; - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, tb[DCB_ATTR_IEEE], - dcbnl_ieee_policy, NULL); + err = nla_parse_nested_deprecated(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], + dcbnl_ieee_policy, NULL); if (err) return err; @@ -1604,8 +1620,9 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, - tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest, NULL); if (ret) return ret; @@ -1648,8 +1665,9 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, if (!tb[DCB_ATTR_FEATCFG]) return -EINVAL; - ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, - tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest, NULL); + ret = nla_parse_nested_deprecated(data, DCB_FEATCFG_ATTR_MAX, + tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest, NULL); if (ret) goto err; @@ -1738,8 +1756,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, - dcbnl_rtnl_policy, extack); + ret = nlmsg_parse_deprecated(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, + dcbnl_rtnl_policy, extack); if (ret < 0) return ret; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 0962f9201baa..cca7ae712995 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -583,8 +583,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) goto errout; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + dn_ifa_policy, extack); if (err < 0) goto errout; @@ -629,8 +629,8 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + dn_ifa_policy, extack); if (err < 0) return err; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 7e47ffdd1412..77fbf8e9df4b 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -517,8 +517,8 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX, + rtm_dn_policy, extack); if (err < 0) return err; @@ -544,8 +544,8 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*r), attrs, RTA_MAX, + rtm_dn_policy, extack); if (err < 0) return err; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 950613ee7881..664584763c36 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1651,8 +1651,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!net_eq(net, &init_net)) return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_dn_policy, extack); if (err < 0) return err; diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 1a002eb85096..4218304cb201 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -247,9 +247,11 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, rtnl_lock(); if (!cb->args[0]) { - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, - genl_family_attrbuf(&nl802154_fam), - nl802154_fam.maxattr, nl802154_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl802154_fam.hdrsize, + genl_family_attrbuf(&nl802154_fam), + nl802154_fam.maxattr, + nl802154_policy, NULL); if (err) goto out_unlock; @@ -562,8 +564,10 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct nl802154_dump_wpan_phy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, - nl802154_fam.maxattr, nl802154_policy, NULL); + int ret = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl802154_fam.hdrsize, + tb, nl802154_fam.maxattr, + nl802154_policy, NULL); /* TODO check if we can handle error here, * we have no backward compatibility @@ -1308,8 +1312,7 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, { struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, - nl802154_dev_addr_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL)) return -EINVAL; if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || @@ -1348,8 +1351,7 @@ ieee802154_llsec_parse_key_id(struct nlattr *nla, { struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla, - nl802154_key_id_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_KEY_ID_ATTR_MAX, nla, nl802154_key_id_policy, NULL)) return -EINVAL; if (!attrs[NL802154_KEY_ID_ATTR_MODE]) @@ -1564,9 +1566,7 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info) struct ieee802154_llsec_key_id id = { }; u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { }; - if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] || @@ -1614,9 +1614,7 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info) struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1]; struct ieee802154_llsec_key_id id; - if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_KEY], - nl802154_key_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_KEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_KEY], nl802154_key_policy, info->extack)) return -EINVAL; if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0) @@ -1730,8 +1728,7 @@ ieee802154_llsec_parse_device(struct nlattr *nla, { struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, - nla, nl802154_dev_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, nla, nl802154_dev_policy, NULL)) return -EINVAL; memset(dev, 0, sizeof(*dev)); @@ -1782,9 +1779,7 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info) struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1]; __le64 extended_addr; - if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_DEVICE], - nl802154_dev_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_DEV_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVICE], nl802154_dev_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]) @@ -1910,9 +1905,7 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info __le64 extended_addr; if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] || - nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy, info->extack) < 0) + nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack) < 0) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] || @@ -1942,9 +1935,7 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info struct ieee802154_llsec_device_key key; __le64 extended_addr; - if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX, - info->attrs[NL802154_ATTR_SEC_DEVKEY], - nl802154_devkey_policy, info->extack)) + if (nla_parse_nested_deprecated(attrs, NL802154_DEVKEY_ATTR_MAX, info->attrs[NL802154_ATTR_SEC_DEVKEY], nl802154_devkey_policy, info->extack)) return -EINVAL; if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]) @@ -2064,8 +2055,7 @@ llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl) { struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1]; - if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, - nla, nl802154_seclevel_policy, NULL)) + if (!nla || nla_parse_nested_deprecated(attrs, NL802154_SECLEVEL_ATTR_MAX, nla, nl802154_seclevel_policy, NULL)) return -EINVAL; memset(sl, 0, sizeof(*sl)); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index eb514f312e6f..701c5d113a34 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -621,8 +621,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv4_policy, extack); if (err < 0) goto errout; @@ -793,8 +793,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, struct in_device *in_dev; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv4_policy, extack); if (err < 0) goto errout; @@ -1689,8 +1689,8 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, fillargs->flags |= NLM_F_DUMP_FILTERED; } - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv4_policy, extack); if (err < 0) return err; @@ -1906,7 +1906,8 @@ static int inet_validate_link_af(const struct net_device *dev, if (dev && !__in_dev_get_rcu(dev)) return -EAFNOSUPPORT; - err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); + err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, + inet_af_policy, NULL); if (err < 0) return err; @@ -1934,7 +1935,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!in_dev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) + if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET_CONF]) { @@ -2076,11 +2077,13 @@ static int inet_netconf_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv4_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv4_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv4_policy, extack); if (err) return err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d4b63f94f7be..b298255f6fdb 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -718,8 +718,8 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, int err, remaining; struct rtmsg *rtm; - err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy, - extack); + err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, + rtm_ipv4_policy, extack); if (err < 0) goto errout; @@ -896,8 +896,8 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, filter->rt_type = rtm->rtm_type; filter->table_id = rtm->rtm_table; - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err < 0) return err; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index c3f3d28d1087..30c1c264bdfc 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -239,8 +239,8 @@ static int ip_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy, - extack); + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP_MAX, attr, + ip_tun_policy, extack); if (err < 0) return err; @@ -356,8 +356,8 @@ static int ip6_tun_build_state(struct nlattr *attr, struct nlattr *tb[LWTUNNEL_IP6_MAX + 1]; int err; - err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy, - extack); + err = nla_parse_nested_deprecated(tb, LWTUNNEL_IP6_MAX, attr, + ip6_tun_policy, extack); if (err < 0) return err; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1322573b8228..2c61e10a60e3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2498,8 +2498,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || @@ -2510,8 +2510,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err) return err; @@ -2674,8 +2674,8 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, struct rtmsg *rtm; int ret, rem; - ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, - extack); + ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, + rtm_ipmr_policy, extack); if (ret < 0) goto out; rtm = nlmsg_data(nlh); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4950adeb05c0..795aed6e4720 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2877,8 +2877,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || @@ -2896,8 +2896,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err) return err; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 01f081aa718c..f96d1de79509 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -611,11 +611,13 @@ static int inet6_netconf_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv6_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv6_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_ipv6_policy, extack); if (err) return err; @@ -4565,8 +4567,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, u32 ifa_flags; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err < 0) return err; @@ -4729,8 +4731,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct ifa6_config cfg; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err < 0) return err; @@ -5086,8 +5088,8 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, fillargs->flags |= NLM_F_DUMP_FILTERED; } - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err < 0) return err; @@ -5237,11 +5239,11 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv6_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, - ifa_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); if (err) return err; @@ -5667,8 +5669,8 @@ static int inet6_validate_link_af(const struct net_device *dev, if (dev && !__in6_dev_get(dev)) return -EAFNOSUPPORT; - return nla_parse_nested(tb, IFLA_INET6_MAX, nla, inet6_af_policy, - NULL); + return nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, + inet6_af_policy, NULL); } static int check_addr_gen_mode(int mode) @@ -5700,7 +5702,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) if (!idev) return -EAFNOSUPPORT; - if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) + if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) BUG(); if (tb[IFLA_INET6_TOKEN]) { diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 1766325423b5..642fc6ac13d2 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -383,8 +383,8 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, u32 label; int err = 0; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); if (err < 0) return err; @@ -537,8 +537,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, - ifal_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, + IFAL_MAX, ifal_policy, extack); ifal = nlmsg_data(nlh); if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { @@ -546,8 +546,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, - ifal_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); if (err) return err; diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 3d56a2fb6f86..422dcc691f71 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -146,7 +146,8 @@ static int ila_build_state(struct nlattr *nla, if (family != AF_INET6) return -EINVAL; - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); + ret = nla_parse_nested_deprecated(tb, ILA_ATTR_MAX, nla, + ila_nl_policy, extack); if (ret < 0) return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e2b47f47de92..b18e85cd7587 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4239,8 +4239,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int pref; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); if (err < 0) goto errout; @@ -4886,8 +4886,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv6_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || @@ -4903,8 +4903,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); if (err) return err; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index ee5403cbe655..7a525fda8978 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -396,8 +396,8 @@ static int seg6_build_state(struct nlattr *nla, if (family != AF_INET && family != AF_INET6) return -EINVAL; - err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, - seg6_iptunnel_policy, extack); + err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy, extack); if (err < 0) return err; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 67005ac71341..78155fdb8c36 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -823,8 +823,9 @@ static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt) int ret; u32 fd; - ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX, - attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL); + ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX, + attrs[SEG6_LOCAL_BPF], + bpf_prog_policy, NULL); if (ret < 0) return ret; @@ -959,8 +960,8 @@ static int seg6_local_build_state(struct nlattr *nla, unsigned int family, if (family != AF_INET6) return -EINVAL; - err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy, - extack); + err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla, + seg6_local_policy, extack); if (err < 0) return err; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 01f8a4f97872..baa098291fb0 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1223,11 +1223,13 @@ static int mpls_netconf_valid_get_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_mpls_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_mpls_policy, extack); - err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, - NETCONFA_MAX, devconf_mpls_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), + tb, NETCONFA_MAX, + devconf_mpls_policy, extack); if (err) return err; @@ -1788,8 +1790,8 @@ static int rtm_to_route_config(struct sk_buff *skb, int index; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); if (err < 0) goto errout; @@ -2106,8 +2108,8 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, cb->answer_flags = NLM_F_DUMP_FILTERED; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); if (err < 0) return err; @@ -2290,8 +2292,8 @@ static int mpls_valid_getroute_req(struct sk_buff *skb, } if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || @@ -2306,8 +2308,8 @@ static int mpls_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); if (err) return err; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 2619c2fbea93..951b52d5835b 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -178,8 +178,8 @@ static int mpls_build_state(struct nlattr *nla, u8 n_labels; int ret; - ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, - mpls_iptunnel_policy, extack); + ret = nla_parse_nested_deprecated(tb, MPLS_IPTUNNEL_MAX, nla, + mpls_iptunnel_policy, extack); if (ret < 0) return ret; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 672ed56b5ef0..37759c88ef02 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -220,8 +220,8 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, void *hdr; int rc; - rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX, - ncsi_genl_policy, NULL); + rc = genlmsg_parse_deprecated(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX, + ncsi_genl_policy, NULL); if (rc) return rc; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 45a257695bef..3f4a4936f63c 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -299,8 +299,7 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, - ipaddr_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -318,8 +317,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, - ipaddr_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -939,8 +937,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && - nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], - set->type->create_policy, NULL)) { + nla_parse_nested_deprecated(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1298,8 +1295,9 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) ip_set_id_t index; /* Second pass, so parser can't fail */ - nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, - ip_set_setname_policy, NULL); + nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr, + nlh->nlmsg_len - min_len, ip_set_setname_policy, + NULL); cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { @@ -1546,8 +1544,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); cmdattr = (void *)&errmsg->msg + min_len; - nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, - nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL); + nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr, + nlh->nlmsg_len - min_len, + ip_set_adt_policy, NULL); errline = nla_data(cda[IPSET_ATTR_LINENO]); @@ -1592,9 +1591,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, - attr[IPSET_ATTR_DATA], - set->type->adt_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1605,8 +1602,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy, NULL)) + nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1647,9 +1643,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, - attr[IPSET_ATTR_DATA], - set->type->adt_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1660,8 +1654,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(*tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, - set->type->adt_policy, NULL)) + nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1692,8 +1685,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, if (!set) return -ENOENT; - if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], - set->type->adt_policy, NULL)) + if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 39892e5d38a2..24bb1a7b590c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3116,8 +3116,7 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, /* Parse mandatory identifying service fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, - ip_vs_svc_policy, NULL)) + nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL)) return -EINVAL; nla_af = attrs[IPVS_SVC_ATTR_AF]; @@ -3279,8 +3278,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, mutex_lock(&__ip_vs_mutex); /* Try to find the service for which to dump destinations */ - if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, - ip_vs_cmd_policy, cb->extack)) + if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) goto out_err; @@ -3316,8 +3314,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, /* Parse mandatory identifying destination fields first */ if (nla == NULL || - nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, - ip_vs_dest_policy, NULL)) + nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL)) return -EINVAL; nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; @@ -3561,9 +3558,7 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || - nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, - info->attrs[IPVS_CMD_ATTR_DAEMON], - ip_vs_daemon_policy, info->extack)) + nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack)) goto out; if (cmd == IPVS_CMD_NEW_DAEMON) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 148b99a15b21..8dcc064d518d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1020,12 +1020,12 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nlattr *tb[CTA_IP_MAX+1]; int ret = 0; - ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL); + ret = nla_parse_nested_deprecated(tb, CTA_IP_MAX, attr, NULL, NULL); if (ret < 0) return ret; - ret = nla_validate_nested(attr, CTA_IP_MAX, - cta_ip_nla_policy, NULL); + ret = nla_validate_nested_deprecated(attr, CTA_IP_MAX, + cta_ip_nla_policy, NULL); if (ret) return ret; @@ -1052,8 +1052,8 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nlattr *tb[CTA_PROTO_MAX+1]; int ret = 0; - ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, CTA_PROTO_MAX, attr, + proto_nla_policy, NULL); if (ret < 0) return ret; @@ -1065,8 +1065,9 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, l4proto = nf_ct_l4proto_find(tuple->dst.protonum); if (likely(l4proto->nlattr_to_tuple)) { - ret = nla_validate_nested(attr, CTA_PROTO_MAX, - l4proto->nla_policy, NULL); + ret = nla_validate_nested_deprecated(attr, CTA_PROTO_MAX, + l4proto->nla_policy, + NULL); if (ret == 0) ret = l4proto->nlattr_to_tuple(tb, tuple); } @@ -1129,8 +1130,8 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy, - NULL); + err = nla_parse_nested_deprecated(tb, CTA_TUPLE_MAX, cda[type], + tuple_nla_policy, NULL); if (err < 0) return err; @@ -1180,7 +1181,8 @@ static int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, int err; struct nlattr *tb[CTA_HELP_MAX+1]; - err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_HELP_MAX, attr, + help_nla_policy, NULL); if (err < 0) return err; @@ -1721,8 +1723,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *tb[CTA_PROTOINFO_MAX+1]; int err = 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy, - NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_MAX, attr, + protoinfo_policy, NULL); if (err < 0) return err; @@ -1745,7 +1747,8 @@ static int change_seq_adj(struct nf_ct_seqadj *seq, int err; struct nlattr *cda[CTA_SEQADJ_MAX+1]; - err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy, NULL); + err = nla_parse_nested_deprecated(cda, CTA_SEQADJ_MAX, attr, + seqadj_policy, NULL); if (err < 0) return err; @@ -1822,8 +1825,9 @@ static int ctnetlink_change_synproxy(struct nf_conn *ct, if (!synproxy) return 0; - err = nla_parse_nested(tb, CTA_SYNPROXY_MAX, cda[CTA_SYNPROXY], - synproxy_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_SYNPROXY_MAX, + cda[CTA_SYNPROXY], synproxy_policy, + NULL); if (err < 0) return err; @@ -2553,7 +2557,8 @@ ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy, NULL); + ret = nla_parse_nested_deprecated(cda, CTA_MAX, attr, ct_nla_policy, + NULL); if (ret < 0) return ret; @@ -2586,8 +2591,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, struct nf_conntrack_expect *exp; int err; - err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy, - NULL); + err = nla_parse_nested_deprecated(cda, CTA_EXPECT_MAX, attr, + exp_nla_policy, NULL); if (err < 0) return err; @@ -3209,8 +3214,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_tuple nat_tuple = {}; int err; - err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, - exp_nat_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_EXPECT_NAT_MAX, attr, + exp_nat_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a4deddebec0a..7491aa4c3566 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -639,8 +639,8 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) if (!attr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, - dccp_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr, + dccp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 8cf36b684400..5b8dde266412 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -563,8 +563,8 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) if (!attr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr, - sctp_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_SCTP_MAX, attr, + sctp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ec6c3618333d..7ba01d8ee165 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1248,8 +1248,8 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) if (!pattr) return 0; - err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, - tcp_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_TCP_MAX, pattr, + tcp_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 715e3d4d761b..cd94481e6c07 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -890,8 +890,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, struct nlattr *tb[CTA_PROTONAT_MAX+1]; int err; - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, - protonat_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_PROTONAT_MAX, attr, + protonat_nla_policy, NULL); if (err < 0) return err; @@ -949,7 +949,8 @@ nfnetlink_parse_nat(const struct nlattr *nat, memset(range, 0, sizeof(*range)); - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy, NULL); + err = nla_parse_nested_deprecated(tb, CTA_NAT_MAX, nat, + nat_nla_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2b79c250ecb4..d98416e83d4e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1420,8 +1420,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) struct nft_stats *stats; int err; - err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr, + nft_counter_policy, NULL); if (err < 0) return ERR_PTR(err); @@ -1525,8 +1525,9 @@ static int nft_chain_parse_hook(struct net *net, lockdep_assert_held(&net->nft.commit_mutex); lockdep_nfnl_nft_mutex_not_held(); - err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], - nft_hook_policy, NULL); + err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX, + nla[NFTA_CHAIN_HOOK], + nft_hook_policy, NULL); if (err < 0) return err; @@ -2105,7 +2106,8 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_EXPR_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla, + nft_expr_policy, NULL); if (err < 0) return err; @@ -2114,8 +2116,9 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, return PTR_ERR(type); if (tb[NFTA_EXPR_DATA]) { - err = nla_parse_nested(info->tb, type->maxattr, - tb[NFTA_EXPR_DATA], type->policy, NULL); + err = nla_parse_nested_deprecated(info->tb, type->maxattr, + tb[NFTA_EXPR_DATA], + type->policy, NULL); if (err < 0) goto err1; } else @@ -3443,8 +3446,8 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, struct nlattr *da[NFTA_SET_DESC_MAX + 1]; int err; - err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, - nft_set_desc_policy, NULL); + err = nla_parse_nested_deprecated(da, NFTA_SET_DESC_MAX, nla, + nft_set_desc_policy, NULL); if (err < 0) return err; @@ -4170,8 +4173,8 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, void *priv; int err; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy, NULL); + err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); if (err < 0) return err; @@ -4402,8 +4405,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, u8 ulen; int err; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy, NULL); + err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); if (err < 0) return err; @@ -4696,8 +4699,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, void *priv; int err; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, - nft_set_elem_policy, NULL); + err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); if (err < 0) goto err1; @@ -4971,8 +4974,8 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, goto err1; if (attr) { - err = nla_parse_nested(tb, type->maxattr, attr, type->policy, - NULL); + err = nla_parse_nested_deprecated(tb, type->maxattr, attr, + type->policy, NULL); if (err < 0) goto err2; } else { @@ -5548,8 +5551,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, int hooknum, priority; int err, n = 0, i; - err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, - nft_flowtable_hook_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, + nft_flowtable_hook_policy, NULL); if (err < 0) return err; @@ -7206,8 +7209,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_chain *chain; int err; - err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NFTA_VERDICT_MAX, nla, + nft_verdict_policy, NULL); if (err < 0) return err; @@ -7337,7 +7340,8 @@ int nft_data_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_DATA_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla, + nft_data_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 916913454624..92077d459109 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -206,8 +206,9 @@ replay: return -ENOMEM; } - err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, - ss->cb[cb_id].policy, extack); + err = nla_parse_deprecated(cda, ss->cb[cb_id].attr_count, + attr, attrlen, + ss->cb[cb_id].policy, extack); if (err < 0) { rcu_read_unlock(); return err; @@ -421,8 +422,10 @@ replay: goto ack; } - err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, - attrlen, ss->cb[cb_id].policy, NULL); + err = nla_parse_deprecated(cda, + ss->cb[cb_id].attr_count, + attr, attrlen, + ss->cb[cb_id].policy, NULL); if (err < 0) goto ack; @@ -520,8 +523,8 @@ static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) return; - err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy, - NULL); + err = nla_parse_deprecated(cda, NFNL_BATCH_MAX, attr, attrlen, + nfnl_batch_policy, NULL); if (err < 0) { netlink_ack(skb, nlh, err, NULL); return; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 8fa8bf7c48e6..02c877432d71 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -248,8 +248,8 @@ static int nfnl_acct_start(struct netlink_callback *cb) if (!attr) return 0; - err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NFACCT_FILTER_MAX, attr, + filter_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 74c9794d28d6..17eb473a626b 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -78,8 +78,8 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, - nfnl_cthelper_tuple_pol, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_TUPLE_MAX, attr, + nfnl_cthelper_tuple_pol, NULL); if (err < 0) return err; @@ -139,8 +139,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, - nfnl_cthelper_expect_pol, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; @@ -176,8 +176,9 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; unsigned int class_max; - ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set, NULL); + ret = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set, + NULL); if (ret < 0) return ret; @@ -289,8 +290,8 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, struct nlattr *tb[NFCTH_POLICY_MAX + 1]; int err; - err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, - nfnl_cthelper_expect_pol, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol, NULL); if (err < 0) return err; @@ -361,8 +362,9 @@ static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, unsigned int class_max; int err; - err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set, NULL); + err = nla_parse_nested_deprecated(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set, + NULL); if (err < 0) return err; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 572cb42e1ee1..427b411c5739 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -59,8 +59,11 @@ ctnl_timeout_parse_policy(void *timeout, if (!tb) return -ENOMEM; - ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, - l4proto->ctnl_timeout.nla_policy, NULL); + ret = nla_parse_nested_deprecated(tb, + l4proto->ctnl_timeout.nlattr_max, + attr, + l4proto->ctnl_timeout.nla_policy, + NULL); if (ret < 0) goto err; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index be7d53943e2d..27dac47b29c2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1139,8 +1139,9 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, struct nlattr *tb[NFQA_VLAN_MAX + 1]; int err; - err = nla_parse_nested(tb, NFQA_VLAN_MAX, nfqa[NFQA_VLAN], - nfqa_vlan_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFQA_VLAN_MAX, + nfqa[NFQA_VLAN], + nfqa_vlan_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 469f9da5073b..276f1f2d6de1 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -198,8 +198,8 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) u32 flags; int err; - err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr, - nft_rule_compat_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_RULE_COMPAT_MAX, attr, + nft_rule_compat_policy, NULL); if (err < 0) return err; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 1738ef6dcb56..b422b74bfe08 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -797,9 +797,11 @@ nft_ct_timeout_parse_policy(void *timeouts, if (!tb) return -ENOMEM; - ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, + l4proto->ctnl_timeout.nlattr_max, + attr, + l4proto->ctnl_timeout.nla_policy, + NULL); if (ret < 0) goto err; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 66b52d015763..3d4c2ae605a8 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -166,8 +166,8 @@ static int nft_tunnel_obj_ip_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_TUNNEL_KEY_IP_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP_MAX, attr, - nft_tunnel_ip_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP_MAX, attr, + nft_tunnel_ip_policy, NULL); if (err < 0) return err; @@ -195,8 +195,8 @@ static int nft_tunnel_obj_ip6_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_TUNNEL_KEY_IP6_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr, - nft_tunnel_ip6_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr, + nft_tunnel_ip6_policy, NULL); if (err < 0) return err; @@ -231,8 +231,8 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr, struct nlattr *tb[NFTA_TUNNEL_KEY_VXLAN_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr, - nft_tunnel_opts_vxlan_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr, + nft_tunnel_opts_vxlan_policy, NULL); if (err < 0) return err; @@ -260,8 +260,9 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr, uint8_t hwid, dir; int err, version; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX, attr, - nft_tunnel_opts_erspan_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX, + attr, nft_tunnel_opts_erspan_policy, + NULL); if (err < 0) return err; @@ -309,8 +310,8 @@ static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx, struct nlattr *tb[NFTA_TUNNEL_KEY_OPTS_MAX + 1]; int err; - err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr, - nft_tunnel_opts_policy, NULL); + err = nla_parse_nested_deprecated(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr, + nft_tunnel_opts_policy, NULL); if (err < 0) return err; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c9775658fb98..8d401df65928 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -99,9 +99,10 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST], - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, NULL) != 0) + if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_TAGLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) return -EINVAL; nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) @@ -146,9 +147,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) return -EINVAL; - if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, NULL) != 0) + if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); @@ -170,9 +172,10 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, info->attrs[NLBL_CIPSOV4_A_MLSLVLLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSLVL) { - if (nla_validate_nested(nla_a, NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, - NULL) != 0) + if (nla_validate_nested_deprecated(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { @@ -234,19 +237,20 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, } if (info->attrs[NLBL_CIPSOV4_A_MLSCATLST]) { - if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, NULL) != 0) + if (nla_validate_nested_deprecated(info->attrs[NLBL_CIPSOV4_A_MLSCATLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_a, info->attrs[NLBL_CIPSOV4_A_MLSCATLST], nla_a_rem) if (nla_type(nla_a) == NLBL_CIPSOV4_A_MLSCAT) { - if (nla_validate_nested(nla_a, - NLBL_CIPSOV4_A_MAX, - netlbl_cipsov4_genl_policy, - NULL) != 0) + if (nla_validate_nested_deprecated(nla_a, + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy, + NULL) != 0) goto add_std_failure; nla_for_each_nested(nla_b, nla_a, nla_b_rem) switch (nla_type(nla_b)) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 83e876591f6c..994d9aff2093 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -577,8 +577,9 @@ static int genl_family_rcv_msg(const struct genl_family *family, attrbuf = family->attrbuf; if (attrbuf) { - err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - family->policy, extack); + err = nlmsg_parse_deprecated(nlh, hdrlen, attrbuf, + family->maxattr, family->policy, + extack); if (err < 0) goto out; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f91ce7c82746..c6ba308cede7 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -119,9 +119,10 @@ static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) int rc; u32 idx; - rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, - attrbuf, nfc_genl_family.maxattr, nfc_genl_policy, - NULL); + rc = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nfc_genl_family.hdrsize, + attrbuf, nfc_genl_family.maxattr, + nfc_genl_policy, NULL); if (rc < 0) return ERR_PTR(rc); @@ -1177,8 +1178,9 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) tlvs_len = 0; nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { - rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr, - nfc_sdp_genl_policy, info->extack); + rc = nla_parse_nested_deprecated(sdp_attrs, NFC_SDP_ATTR_MAX, + attr, nfc_sdp_genl_policy, + info->extack); if (rc != 0) { rc = -EINVAL; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 356677c3a0c2..3b99fc3de9ac 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1375,8 +1375,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 ufid_flags; int err; - err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, - OVS_FLOW_ATTR_MAX, flow_policy, NULL); + err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy, NULL); if (err) return err; ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 2427b672107a..54eb80dd2dc6 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2854,8 +2854,8 @@ static int validate_userspace(const struct nlattr *attr) struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, - userspace_policy, NULL); + error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr, + userspace_policy, NULL); if (error) return error; @@ -2885,8 +2885,9 @@ static int validate_and_copy_check_pkt_len(struct net *net, int nested_acts_start; int start, err; - err = nla_parse_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX, nla_data(attr), - nla_len(attr), cpl_policy, NULL); + err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX, + nla_data(attr), nla_len(attr), + cpl_policy, NULL); if (err) return err; diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index fdc8be7fd8f3..9c89e8539a5a 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -227,9 +227,9 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; u32 band_max_delta_t; - err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, - nla_data(nla), nla_len(nla), band_policy, - NULL); + err = nla_parse_deprecated((struct nlattr **)&attr, + OVS_BAND_ATTR_MAX, nla_data(nla), + nla_len(nla), band_policy, NULL); if (err) goto exit_free_meter; diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 54965ff8cc66..f3c54871f9e1 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -70,8 +70,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, if (nla_len(attr) < sizeof(struct nlattr)) return -EINVAL; - err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy, - NULL); + err = nla_parse_nested_deprecated(exts, OVS_VXLAN_EXT_MAX, attr, + exts_policy, NULL); if (err < 0) return err; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 871eaf2cb85e..be92d936b5d5 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -79,8 +79,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_phonet_policy, extack); if (err < 0) return err; @@ -246,8 +246,8 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_phonet_policy, extack); if (err < 0) return err; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 7c5e8292cc0a..dd0e97f4f6c0 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1091,7 +1091,8 @@ static int qrtr_addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, ASSERT_RTNL(); - rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, qrtr_policy, extack); + rc = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX, + qrtr_policy, extack); if (rc < 0) return rc; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 641ad7575f24..683fcc00da49 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -849,7 +849,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; if (name == NULL) { - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, + extack); if (err < 0) goto err_out; err = -EINVAL; @@ -964,7 +965,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, int err; int i; - err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, + extack); if (err < 0) return err; @@ -1099,7 +1101,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, int index; int err; - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; @@ -1153,7 +1155,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, b = skb_tail_pointer(skb); - err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack); + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, NULL, extack); if (err < 0) goto err_out; @@ -1282,7 +1284,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, size_t attr_size = 0; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; - ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); + ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, + extack); if (ret < 0) return ret; @@ -1384,8 +1387,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, - extack); + ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca, + TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1436,13 +1439,12 @@ static struct nlattr *find_dump_kind(struct nlattr **nla) if (tb1 == NULL) return NULL; - if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), - NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) + if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; - if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) + if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], NULL, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; @@ -1466,8 +1468,8 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) u32 msecs_since = 0; u32 act_count = 0; - ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, - tcaa_policy, cb->extack); + ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb, + TCA_ROOT_MAX, tcaa_policy, cb->extack); if (ret < 0) return ret; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 3841156aa09f..a0c77faca04b 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -293,7 +293,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy, NULL); + ret = nla_parse_nested_deprecated(tb, TCA_ACT_BPF_MAX, nla, + act_bpf_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 32ae0cd6e31c..8838575cd536 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -111,8 +111,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, TCA_CONNMARK_MAX, nla, + connmark_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 0c77e7bdf6d5..14bb525e355e 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -61,7 +61,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CSUM_MAX, nla, csum_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e540e31069d7..75492b07f324 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -74,7 +74,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_GACT_MAX, nla, gact_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 7a87ce2e5a76..12489f60a979 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -486,7 +486,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, int ret = 0; int err; - err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_IFE_MAX, nla, ife_policy, + NULL); if (err < 0) return err; @@ -567,8 +568,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, INIT_LIST_HEAD(&ife->metalist); if (tb[TCA_IFE_METALST]) { - err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], - NULL, NULL); + err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, + tb[TCA_IFE_METALST], NULL, + NULL); if (err) goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 04a0b5c61194..ae6e28ab1cd7 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -113,7 +113,8 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 17cc6bd4c57c..c329390342f4 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -111,7 +111,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed"); return -EINVAL; } - ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack); + ret = nla_parse_nested_deprecated(tb, TCA_MIRRED_MAX, nla, + mirred_policy, extack); if (ret < 0) return ret; if (!tb[TCA_MIRRED_PARMS]) { diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index e91bb8eb81ec..51bd1ba02380 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -52,7 +52,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_NAT_MAX, nla, nat_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index ce4b54fa7834..d790c02b9c6c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -70,8 +70,9 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, goto err_out; } - err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, - pedit_key_ex_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PEDIT_KEY_EX_MAX, + ka, pedit_key_ex_policy, + NULL); if (err) goto err_out; @@ -158,7 +159,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return -EINVAL; } - err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PEDIT_MAX, nla, + pedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 2b8581f6ab51..b48e40c69ad0 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -100,7 +100,8 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_POLICE_MAX, nla, + police_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 0f82d50ea232..b2faa43c1ac7 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -53,7 +53,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy, NULL); + ret = nla_parse_nested_deprecated(tb, TCA_SAMPLE_MAX, nla, + sample_policy, NULL); if (ret < 0) return ret; if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 23c8ca5615e5..ead480e6014c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -104,7 +104,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla, simple_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 7e1d261a31d2..7ec159b95364 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -114,7 +114,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SKBEDIT_MAX, nla, + skbedit_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 1d4c324d0a42..186ef98c828f 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -102,7 +102,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SKBMOD_MAX, nla, + skbmod_policy, NULL); if (err < 0) return err; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 45c0c253c7e8..6a9070511ee8 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -76,8 +76,9 @@ tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len, int err, data_len, opt_len; u8 *data; - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, - nla, geneve_opt_policy, extack); + err = nla_parse_nested_deprecated(tb, + TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); if (err < 0) return err; @@ -125,8 +126,8 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst, int err, rem, opt_len, len = nla_len(nla), opts_len = 0; const struct nlattr *attr, *head = nla_data(nla); - err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; @@ -235,8 +236,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return -EINVAL; } - err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy, - extack); + err = nla_parse_nested_deprecated(tb, TCA_TUNNEL_KEY_MAX, nla, + tunnel_key_policy, extack); if (err < 0) { NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes"); return err; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 0f40d0a74423..39bd9fa3e455 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -124,7 +124,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!nla) return -EINVAL; - err = nla_parse_nested(tb, TCA_VLAN_MAX, nla, vlan_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_VLAN_MAX, nla, vlan_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 78de717afddf..263c2ec082c9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2006,7 +2006,8 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, replay: tp_created = 0; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2217,7 +2218,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2366,7 +2368,8 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; bool rtnl_held = false; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2558,8 +2561,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, + NULL, cb->extack); if (err) return err; @@ -2806,7 +2809,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -2937,8 +2941,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; - err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, cb->extack); if (err) return err; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index dd5fdb62c6df..923863f3b0d8 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -185,8 +185,8 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], - basic_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], + basic_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6fd569c5a036..9bcf499cce0c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -468,8 +468,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (tca[TCA_OPTIONS] == NULL) return -EINVAL; - ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy, - NULL); + ret = nla_parse_nested_deprecated(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], + bpf_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index b680dd684282..037d128c2851 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -104,8 +104,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; new->handle = handle; new->tp = tp; - err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], - cgroup_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CGROUP_MAX, + tca[TCA_OPTIONS], cgroup_policy, + NULL); if (err < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index cb29fe7d5ed3..7bb79ec5b176 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -408,7 +408,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOW_MAX, opt, flow_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8d4f7a672f14..f6685fc53119 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -884,8 +884,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, return -EINVAL; } - err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, - nla, geneve_opt_policy, extack); + err = nla_parse_nested_deprecated(tb, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, + nla, geneve_opt_policy, extack); if (err < 0) return err; @@ -947,18 +948,18 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL; int err, option_len, key_depth, msk_depth = 0; - err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS], - TCA_FLOWER_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]); if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) { - err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], - TCA_FLOWER_KEY_ENC_OPTS_MAX, - enc_opts_policy, extack); + err = nla_validate_nested_deprecated(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK], + TCA_FLOWER_KEY_ENC_OPTS_MAX, + enc_opts_policy, extack); if (err) return err; @@ -1513,8 +1514,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout_mask_alloc; } - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], - fl_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, + tca[TCA_OPTIONS], fl_policy, NULL); if (err < 0) goto errout_tb; @@ -1852,8 +1853,8 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) return ERR_PTR(-ENOBUFS); - err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], - fl_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FLOWER_MAX, + tca[TCA_OPTIONS], fl_policy, NULL); if (err) goto errout_tb; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 3fcc1d51b9d7..1d0b39c3932f 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -263,7 +263,8 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (!opt) return handle ? -EINVAL : 0; /* Succeed if it is old method. */ - err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FW_MAX, opt, fw_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d54fa8e11b9e..46982b4ea70a 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -181,8 +181,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (head) return -EEXIST; - err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS], - mall_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_MATCHALL_MAX, + tca[TCA_OPTIONS], mall_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index b3b9b151a61d..eeff5bbfb912 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -484,7 +484,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + route4_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index fa059cf934a6..a4688bb92f43 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -497,7 +497,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (opt == NULL) return handle ? -EINVAL : 0; - err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 1a2e7d5a8776..9f4f4203c388 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -510,7 +510,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, if (!opt) return 0; - err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt, + tcindex_policy, NULL); if (err < 0) return err; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 499477058b2d..04e9ef088535 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -884,7 +884,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } } - err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_U32_MAX, opt, u32_policy, + extack); if (err < 0) return err; diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index a5f34e930eff..60c26b8294b5 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -120,8 +120,8 @@ static int em_ipt_change(struct net *net, void *data, int data_len, struct xt_match *match; int mdata_len, ret; - ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy, - NULL); + ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len, + em_ipt_policy, NULL); if (ret < 0) return ret; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index d6e97115500b..28dfa8f2a4ea 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -912,7 +912,8 @@ static int em_meta_change(struct net *net, void *data, int len, struct tcf_meta_hdr *hdr; struct meta_match *meta = NULL; - err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy, NULL); + err = nla_parse_deprecated(tb, TCA_EM_META_MAX, data, len, + meta_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 6f2d6a761dbe..7b86c2a44746 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -314,7 +314,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, if (!nla) return 0; - err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_EMATCH_TREE_MAX, nla, + em_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6c81b22d214f..607e84d67c33 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -479,7 +479,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, u16 *tab = NULL; int err; - err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, + extack); if (err < 0) return ERR_PTR(err); if (!tb[TCA_STAB_BASE]) { @@ -1423,8 +1424,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -1508,8 +1509,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, replay: /* Reinit, just in case something touches this. */ - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; @@ -1743,8 +1744,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, - rtm_tca_policy, cb->extack); + err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, + rtm_tca_policy, cb->extack); if (err < 0) return err; @@ -1972,8 +1973,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, - extack); + err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, + rtm_tca_policy, extack); if (err < 0) return err; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index c36aa57eb4af..ae506c7906cd 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -223,7 +223,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, if (opt == NULL) return -EINVAL; - error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy, NULL); + error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, + NULL); if (error < 0) return error; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 50db72fe44de..53a80bc6b13a 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2531,7 +2531,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 243bce4b888b..ba4b33b74dd8 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1149,7 +1149,8 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, + extack); if (err < 0) return err; @@ -1473,7 +1474,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t return -EINVAL; } - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, cbq_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index adffc6d68c06..8077c846f5bf 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -358,7 +358,8 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt, struct tc_cbs_qopt *qopt; int err; - err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index eda21dc94bde..370dbcf49e8b 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -358,7 +358,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt, + choke_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 60ac4e61ce3a..25ef172c23df 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -141,7 +141,8 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, + codel_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 022db73fd5a9..ffcd6654c39d 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -70,7 +70,8 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_DRR_MAX, opt, drr_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index cdf744e710f1..3deeb06eaecf 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -132,7 +132,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, if (!opt) goto errout; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, + dsmark_policy, NULL); if (err < 0) goto errout; @@ -353,7 +354,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, + dsmark_policy, NULL); if (err < 0) goto errout; diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 67107caa287c..db0c2ba1d156 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -351,7 +351,8 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; } - err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 5ca370e78d3a..d107c74767cd 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -684,7 +684,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 825a933b019a..08d85370b97c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -387,8 +387,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy, - NULL); + err = nla_parse_nested_deprecated(tb, TCA_FQ_CODEL_MAX, opt, + fq_codel_policy, NULL); if (err < 0) return err; if (tb[TCA_FQ_CODEL_FLOWS]) { diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 9bfa15e12d23..dfa657da100f 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -538,7 +538,8 @@ static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry) struct nlattr *tb[TCA_GRED_VQ_MAX + 1]; u32 dp; - nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL); + nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, + gred_vq_policy, NULL); dp = nla_get_u32(tb[TCA_GRED_VQ_DP]); @@ -568,8 +569,8 @@ static int gred_vq_validate(struct gred_sched *table, u32 cdp, int err; u32 dp; - err = nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, - extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry, + gred_vq_policy, extack); if (err < 0) return err; @@ -610,8 +611,8 @@ static int gred_vqs_validate(struct gred_sched *table, u32 cdp, const struct nlattr *attr; int rem, err; - err = nla_validate_nested(vqs, TCA_GRED_VQ_ENTRY_MAX, - gred_vqe_policy, extack); + err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX, + gred_vqe_policy, extack); if (err < 0) return err; @@ -650,7 +651,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, + extack); if (err < 0) return err; @@ -737,7 +739,8 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, + extack); if (err < 0) return err; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 97d2fb91c39f..433f2190960f 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -926,7 +926,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HFSC_MAX, opt, hfsc_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 43bc159c4f7c..a28e09b1609c 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -518,7 +518,8 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 64010aec5437..d27d9bc9d010 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1012,7 +1012,8 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (err) return err; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + NULL); if (err < 0) return err; @@ -1310,7 +1311,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!opt) goto failure; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + NULL); if (err < 0) goto failure; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 7afefed72d35..d05086dc3866 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -125,8 +125,9 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, int nested_len = nla_len(nla) - NLA_ALIGN(len); if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy, NULL); + return nla_parse_deprecated(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0242c0d4a2d0..78aa76b0da2e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -935,8 +935,9 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, } if (nested_len >= nla_attr_size(0)) - return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), - nested_len, policy, NULL); + return nla_parse_deprecated(tb, maxtype, + nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 9bf41f4a2312..8fa129d3943e 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -216,7 +216,8 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bab2d4026e8b..3f9e8b425ac6 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -410,8 +410,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -EINVAL; } - err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy, - NULL); + err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], + qfq_policy, NULL); if (err < 0) return err; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index b9f34e057e87..1e68a13bb66b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -205,7 +205,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (opt == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, + NULL); if (err < 0) return err; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index f54b00a431a3..b245d6a2068d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -499,7 +499,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, int err; if (opt) { - err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_SFB_MAX, opt, + sfb_policy, NULL); if (err < 0) return -EINVAL; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index e016ee07dd1f..09563c245473 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -310,8 +310,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; int err; - err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, - entry_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, + entry_policy, NULL); if (err < 0) { NL_SET_ERR_MSG(extack, "Could not parse nested entry"); return -EINVAL; @@ -334,8 +334,8 @@ static int parse_sched_single_entry(struct nlattr *n, u32 index; int err; - err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, - n, entry_list_policy, NULL); + err = nla_parse_nested_deprecated(tb_list, TCA_TAPRIO_SCHED_MAX, n, + entry_list_policy, NULL); if (err < 0) { NL_SET_ERR_MSG(extack, "Could not parse nested entry"); return -EINVAL; @@ -346,9 +346,10 @@ static int parse_sched_single_entry(struct nlattr *n, return -EINVAL; } - err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, - tb_list[TCA_TAPRIO_SCHED_ENTRY], - entry_policy, NULL); + err = nla_parse_nested_deprecated(tb_entry, + TCA_TAPRIO_SCHED_ENTRY_MAX, + tb_list[TCA_TAPRIO_SCHED_ENTRY], + entry_policy, NULL); if (err < 0) { NL_SET_ERR_MSG(extack, "Could not parse nested entry"); return -EINVAL; @@ -644,8 +645,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, int i, err, size; ktime_t start; - err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, - taprio_policy, extack); + err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, + taprio_policy, extack); if (err < 0) return err; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 3ae5a29eeab3..c09c0d855846 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -308,7 +308,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; - err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL); + err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, + NULL); if (err < 0) return err; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index fd8e4e83f5e0..2bed6589f41e 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -776,9 +776,9 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -825,9 +825,9 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -870,9 +870,9 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -921,9 +921,9 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -964,9 +964,9 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, - info->attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); if (err) return err; @@ -1107,9 +1107,9 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, - info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); if (err) return err; @@ -1155,9 +1155,9 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, - info->attrs[TIPC_NLA_MEDIA], - tipc_nl_media_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); if (!attrs[TIPC_NLA_MEDIA_NAME]) return -EINVAL; diff --git a/net/tipc/link.c b/net/tipc/link.c index 0327c8ff8d48..1c514b64a0a9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2148,8 +2148,8 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) { int err; - err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, - tipc_nl_prop_policy, NULL); + err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop, + tipc_nl_prop_policy, NULL); if (err) return err; diff --git a/net/tipc/net.c b/net/tipc/net.c index 0bba4e6b005c..85707c185360 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -245,9 +245,9 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, - info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); if (err) return err; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 2d178df0a89f..3d5d0fb5b37c 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -255,8 +255,8 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) if (!*attr) return -EOPNOTSUPP; - return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy, - NULL); + return nlmsg_parse_deprecated(nlh, GENL_HDRLEN, *attr, maxattr, + tipc_nl_policy, NULL); } int __init tipc_netlink_start(void) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 36fe2dbb6d87..f7269ce934b5 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -328,9 +328,9 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, if (err) goto doit_out; - err = nla_parse(attrbuf, tipc_genl_family.maxattr, - (const struct nlattr *)trans_buf->data, - trans_buf->len, NULL, NULL); + err = nla_parse_deprecated(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL, NULL); if (err) goto doit_out; @@ -378,8 +378,8 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], NULL, NULL); + err = nla_parse_nested_deprecated(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL, NULL); if (err) return err; @@ -514,24 +514,26 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL, NULL); + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); if (err) return err; if (!link[TIPC_NLA_LINK_PROP]) return -EINVAL; - err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, - link[TIPC_NLA_LINK_PROP], NULL, NULL); + err = nla_parse_nested_deprecated(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL, + NULL); if (err) return err; if (!link[TIPC_NLA_LINK_STATS]) return -EINVAL; - err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, - link[TIPC_NLA_LINK_STATS], NULL, NULL); + err = nla_parse_nested_deprecated(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL, + NULL); if (err) return err; @@ -645,8 +647,8 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], - NULL, NULL); + err = nla_parse_nested_deprecated(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL, NULL); if (err) return err; @@ -869,16 +871,18 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NAME_TABLE]) return -EINVAL; - err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL, NULL); + err = nla_parse_nested_deprecated(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL, + NULL); if (err) return err; if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) return -EINVAL; - err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, - nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL); + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, + NULL); if (err) return err; @@ -937,8 +941,8 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_PUBL]) return -EINVAL; - err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], - NULL, NULL); + err = nla_parse_nested_deprecated(publ, TIPC_NLA_PUBL_MAX, + attrs[TIPC_NLA_PUBL], NULL, NULL); if (err) return err; @@ -1007,8 +1011,8 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; - err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], - NULL, NULL); + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], NULL, NULL); if (err) return err; @@ -1019,8 +1023,9 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - err = nla_parse_nested(con, TIPC_NLA_CON_MAX, - sock[TIPC_NLA_SOCK_CON], NULL, NULL); + err = nla_parse_nested_deprecated(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], + NULL, NULL); if (err) return err; @@ -1059,8 +1064,8 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_MEDIA]) return -EINVAL; - err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, - attrs[TIPC_NLA_MEDIA], NULL, NULL); + err = nla_parse_nested_deprecated(media, TIPC_NLA_MEDIA_MAX, + attrs[TIPC_NLA_MEDIA], NULL, NULL); if (err) return err; @@ -1079,8 +1084,8 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NODE]) return -EINVAL; - err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], - NULL, NULL); + err = nla_parse_nested_deprecated(node, TIPC_NLA_NODE_MAX, + attrs[TIPC_NLA_NODE], NULL, NULL); if (err) return err; @@ -1126,8 +1131,8 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, if (!attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], - NULL, NULL); + err = nla_parse_nested_deprecated(net, TIPC_NLA_NET_MAX, + attrs[TIPC_NLA_NET], NULL, NULL); if (err) return err; diff --git a/net/tipc/node.c b/net/tipc/node.c index 3777254a508f..0eb1bf850219 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1885,9 +1885,9 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_NET]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, - info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, - info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy, info->extack); if (err) return err; @@ -2043,9 +2043,9 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2119,9 +2119,9 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2184,9 +2184,9 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, - info->attrs[TIPC_NLA_LINK], - tipc_nl_link_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); if (err) return err; @@ -2324,9 +2324,10 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[TIPC_NLA_MON]) return -EINVAL; - err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, - info->attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, TIPC_NLA_MON_MAX, + info->attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + info->extack); if (err) return err; @@ -2444,9 +2445,10 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, if (!attrs[TIPC_NLA_MON]) return -EINVAL; - err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, - attrs[TIPC_NLA_MON], - tipc_nl_monitor_policy, NULL); + err = nla_parse_nested_deprecated(mon, TIPC_NLA_MON_MAX, + attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, + NULL); if (err) return err; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7918f4763fdc..145e4decb0c9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3599,9 +3599,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!attrs[TIPC_NLA_SOCK]) return -EINVAL; - err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, - attrs[TIPC_NLA_SOCK], - tipc_nl_sock_policy, NULL); + err = nla_parse_nested_deprecated(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], + tipc_nl_sock_policy, NULL); if (err) return err; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 24d7c79598bb..7fc02d84c4f1 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -447,9 +447,9 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) if (!attrs[TIPC_NLA_BEARER]) return -EINVAL; - err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, - attrs[TIPC_NLA_BEARER], - tipc_nl_bearer_policy, NULL); + err = nla_parse_nested_deprecated(battrs, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, NULL); if (err) return err; @@ -601,8 +601,7 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; struct udp_media_addr *dst; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, - tipc_nl_udp_policy, NULL)) + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy, NULL)) return -EINVAL; if (!opts[TIPC_NLA_UDP_REMOTE]) @@ -655,9 +654,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) goto err; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, - attrs[TIPC_NLA_BEARER_UDP_OPTS], - tipc_nl_udp_policy, NULL)) + if (nla_parse_nested_deprecated(opts, TIPC_NLA_UDP_MAX, attrs[TIPC_NLA_BEARER_UDP_OPTS], tipc_nl_udp_policy, NULL)) goto err; if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0bcd5ea4b4f2..782c8225a90a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -703,9 +703,11 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb, int err; if (!cb->args[0]) { - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - genl_family_attrbuf(&nl80211_fam), - nl80211_fam.maxattr, nl80211_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + genl_family_attrbuf(&nl80211_fam), + nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) return err; @@ -925,8 +927,9 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, struct key_parse *k) { struct nlattr *tb[NL80211_KEY_MAX + 1]; - int err = nla_parse_nested(tb, NL80211_KEY_MAX, key, - nl80211_key_policy, info->extack); + int err = nla_parse_nested_deprecated(tb, NL80211_KEY_MAX, key, + nl80211_key_policy, + info->extack); if (err) return err; @@ -962,10 +965,11 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, if (tb[NL80211_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; - err = nla_parse_nested(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, - tb[NL80211_KEY_DEFAULT_TYPES], - nl80211_key_default_policy, - info->extack); + err = nla_parse_nested_deprecated(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + tb[NL80211_KEY_DEFAULT_TYPES], + nl80211_key_default_policy, + info->extack); if (err) return err; @@ -1012,11 +1016,11 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; - int err = nla_parse_nested(kdt, - NUM_NL80211_KEY_DEFAULT_TYPES - 1, - info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], - nl80211_key_default_policy, - info->extack); + int err = nla_parse_nested_deprecated(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], + nl80211_key_default_policy, + info->extack); if (err) return err; @@ -2317,8 +2321,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct nl80211_dump_wiphy_state *state) { struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); - int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, - nl80211_fam.maxattr, nl80211_policy, NULL); + int ret = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, + nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ if (ret) return 0; @@ -2761,10 +2767,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { - result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX, - nl_txq_params, - txq_params_policy, - info->extack); + result = nla_parse_nested_deprecated(tb, + NL80211_TXQ_ATTR_MAX, + nl_txq_params, + txq_params_policy, + info->extack); if (result) return result; result = parse_txq_params(tb, &txq_params); @@ -3221,8 +3228,7 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) if (!nla) return -EINVAL; - if (nla_parse_nested(flags, NL80211_MNTR_FLAG_MAX, nla, - mntr_flags_policy, NULL)) + if (nla_parse_nested_deprecated(flags, NL80211_MNTR_FLAG_MAX, nla, mntr_flags_policy, NULL)) return -EINVAL; for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++) @@ -4101,8 +4107,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, sband = rdev->wiphy.bands[band]; if (sband == NULL) return -EINVAL; - err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates, - nl80211_txattr_policy, info->extack); + err = nla_parse_nested_deprecated(tb, NL80211_TXRATE_MAX, + tx_rates, + nl80211_txattr_policy, + info->extack); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { @@ -4270,9 +4278,10 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, if (attrs[NL80211_ATTR_FTM_RESPONDER]) { struct nlattr *tb[NL80211_FTM_RESP_ATTR_MAX + 1]; - err = nla_parse_nested(tb, NL80211_FTM_RESP_ATTR_MAX, - attrs[NL80211_ATTR_FTM_RESPONDER], - NULL, NULL); + err = nla_parse_nested_deprecated(tb, + NL80211_FTM_RESP_ATTR_MAX, + attrs[NL80211_ATTR_FTM_RESPONDER], + NULL, NULL); if (err) return err; @@ -4680,8 +4689,7 @@ static int parse_station_flags(struct genl_info *info, if (!nla) return 0; - if (nla_parse_nested(flags, NL80211_STA_FLAG_MAX, nla, - sta_flags_policy, info->extack)) + if (nla_parse_nested_deprecated(flags, NL80211_STA_FLAG_MAX, nla, sta_flags_policy, info->extack)) return -EINVAL; /* @@ -5350,8 +5358,9 @@ static int nl80211_parse_sta_wme(struct genl_info *info, return 0; nla = info->attrs[NL80211_ATTR_STA_WME]; - err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, - nl80211_sta_wme_policy, info->extack); + err = nla_parse_nested_deprecated(tb, NL80211_STA_WME_MAX, nla, + nl80211_sta_wme_policy, + info->extack); if (err) return err; @@ -6491,9 +6500,7 @@ do { \ if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) return -EINVAL; - if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, - info->attrs[NL80211_ATTR_MESH_CONFIG], - nl80211_meshconf_params_policy, info->extack)) + if (nla_parse_nested_deprecated(tb, NL80211_MESHCONF_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_CONFIG], nl80211_meshconf_params_policy, info->extack)) return -EINVAL; /* This makes sure that there aren't more than 32 mesh config @@ -6626,9 +6633,7 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, if (!info->attrs[NL80211_ATTR_MESH_SETUP]) return -EINVAL; - if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX, - info->attrs[NL80211_ATTR_MESH_SETUP], - nl80211_mesh_setup_params_policy, info->extack)) + if (nla_parse_nested_deprecated(tb, NL80211_MESH_SETUP_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_SETUP], nl80211_mesh_setup_params_policy, info->extack)) return -EINVAL; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC]) @@ -7012,9 +7017,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { - r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX, - nl_reg_rule, reg_rule_policy, - info->extack); + r = nla_parse_nested_deprecated(tb, NL80211_REG_RULE_ATTR_MAX, + nl_reg_rule, reg_rule_policy, + info->extack); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); @@ -7085,8 +7090,9 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, if (!nla_ok(nest, nla_len(nest))) return -EINVAL; - err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, - nl80211_bss_select_policy, NULL); + err = nla_parse_nested_deprecated(attr, NL80211_BSS_SELECT_ATTR_MAX, + nest, nl80211_bss_select_policy, + NULL); if (err) return err; @@ -7579,8 +7585,10 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, if (WARN_ON(i >= n_plans)) return -EINVAL; - err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX, - attr, nl80211_plan_policy, NULL); + err = nla_parse_nested_deprecated(plan, + NL80211_SCHED_SCAN_PLAN_MAX, + attr, nl80211_plan_policy, + NULL); if (err) return err; @@ -7701,10 +7709,11 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *rssi; - err = nla_parse_nested(tb, - NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy, - NULL); + err = nla_parse_nested_deprecated(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, + nl80211_match_policy, + NULL); if (err) return ERR_PTR(err); @@ -7888,10 +7897,11 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, tmp) { struct nlattr *ssid, *bssid, *rssi; - err = nla_parse_nested(tb, - NL80211_SCHED_SCAN_MATCH_ATTR_MAX, - attr, nl80211_match_policy, - NULL); + err = nla_parse_nested_deprecated(tb, + NL80211_SCHED_SCAN_MATCH_ATTR_MAX, + attr, + nl80211_match_policy, + NULL); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; @@ -8275,9 +8285,9 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX, - info->attrs[NL80211_ATTR_CSA_IES], - nl80211_policy, info->extack); + err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX, + info->attrs[NL80211_ATTR_CSA_IES], + nl80211_policy, info->extack); if (err) return err; @@ -9552,9 +9562,10 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } else { struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - attrbuf, nl80211_fam.maxattr, - nl80211_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + attrbuf, nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) goto out_err; @@ -10678,8 +10689,9 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) if (!cqm) return -EINVAL; - err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm, - nl80211_attr_cqm_policy, info->extack); + err = nla_parse_nested_deprecated(attrs, NL80211_ATTR_CQM_MAX, cqm, + nl80211_attr_cqm_policy, + info->extack); if (err) return err; @@ -11117,8 +11129,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (!rdev->wiphy.wowlan->tcp) return -EINVAL; - err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr, - nl80211_wowlan_tcp_policy, NULL); + err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TCP, attr, + nl80211_wowlan_tcp_policy, NULL); if (err) return err; @@ -11263,8 +11275,8 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, goto out; } - err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy, - NULL); + err = nla_parse_nested_deprecated(tb, NL80211_ATTR_MAX, attr, + nl80211_policy, NULL); if (err) goto out; @@ -11299,9 +11311,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto set_wakeup; } - err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG, - info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], - nl80211_wowlan_policy, info->extack); + err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TRIG, + info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], + nl80211_wowlan_policy, info->extack); if (err) return err; @@ -11383,9 +11395,11 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) rem) { u8 *mask_pat; - err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, - info->extack); + err = nla_parse_nested_deprecated(pat_tb, + MAX_NL80211_PKTPAT, + pat, + nl80211_packet_pattern_policy, + info->extack); if (err) goto error; @@ -11598,8 +11612,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, int rem, pat_len, mask_len, pkt_offset, n_patterns = 0; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; - err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, - nl80211_coalesce_policy, NULL); + err = nla_parse_nested_deprecated(tb, NL80211_ATTR_COALESCE_RULE_MAX, + rule, nl80211_coalesce_policy, NULL); if (err) return err; @@ -11634,8 +11648,10 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, NULL); + err = nla_parse_nested_deprecated(pat_tb, MAX_NL80211_PKTPAT, + pat, + nl80211_packet_pattern_policy, + NULL); if (err) return err; @@ -11757,9 +11773,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_REKEY_DATA]) return -EINVAL; - err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA, - info->attrs[NL80211_ATTR_REKEY_DATA], - nl80211_rekey_policy, info->extack); + err = nla_parse_nested_deprecated(tb, MAX_NL80211_REKEY_DATA, + info->attrs[NL80211_ATTR_REKEY_DATA], + nl80211_rekey_policy, info->extack); if (err) return err; @@ -12071,9 +12087,10 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (!info->attrs[NL80211_ATTR_NAN_FUNC]) return -EINVAL; - err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX, - info->attrs[NL80211_ATTR_NAN_FUNC], - nl80211_nan_func_policy, info->extack); + err = nla_parse_nested_deprecated(tb, NL80211_NAN_FUNC_ATTR_MAX, + info->attrs[NL80211_ATTR_NAN_FUNC], + nl80211_nan_func_policy, + info->extack); if (err) return err; @@ -12169,9 +12186,11 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (tb[NL80211_NAN_FUNC_SRF]) { struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; - err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX, - tb[NL80211_NAN_FUNC_SRF], - nl80211_nan_srf_policy, info->extack); + err = nla_parse_nested_deprecated(srf_tb, + NL80211_NAN_SRF_ATTR_MAX, + tb[NL80211_NAN_FUNC_SRF], + nl80211_nan_srf_policy, + info->extack); if (err) goto out; @@ -12704,8 +12723,10 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, return 0; } - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, - nl80211_fam.maxattr, nl80211_policy, NULL); + err = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + attrbuf, nl80211_fam.maxattr, + nl80211_policy, NULL); if (err) return err; diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 5c80bccc8b3c..1b190475359a 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -25,7 +25,8 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, } /* no validation needed - was already done via nested policy */ - nla_parse_nested(tb, NL80211_PMSR_FTM_REQ_ATTR_MAX, ftmreq, NULL, NULL); + nla_parse_nested_deprecated(tb, NL80211_PMSR_FTM_REQ_ATTR_MAX, ftmreq, + NULL, NULL); if (tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE]) preamble = nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE]); @@ -139,7 +140,8 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, int err, rem; /* no validation needed - was already done via nested policy */ - nla_parse_nested(tb, NL80211_PMSR_PEER_ATTR_MAX, peer, NULL, NULL); + nla_parse_nested_deprecated(tb, NL80211_PMSR_PEER_ATTR_MAX, peer, + NULL, NULL); if (!tb[NL80211_PMSR_PEER_ATTR_ADDR] || !tb[NL80211_PMSR_PEER_ATTR_CHAN] || @@ -154,9 +156,9 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, /* reuse info->attrs */ memset(info->attrs, 0, sizeof(*info->attrs) * (NL80211_ATTR_MAX + 1)); /* need to validate here, we don't want to have validation recursion */ - err = nla_parse_nested(info->attrs, NL80211_ATTR_MAX, - tb[NL80211_PMSR_PEER_ATTR_CHAN], - nl80211_policy, info->extack); + err = nla_parse_nested_deprecated(info->attrs, NL80211_ATTR_MAX, + tb[NL80211_PMSR_PEER_ATTR_CHAN], + nl80211_policy, info->extack); if (err) return err; @@ -165,9 +167,9 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, return err; /* no validation needed - was already done via nested policy */ - nla_parse_nested(req, NL80211_PMSR_REQ_ATTR_MAX, - tb[NL80211_PMSR_PEER_ATTR_REQ], - NULL, NULL); + nla_parse_nested_deprecated(req, NL80211_PMSR_REQ_ATTR_MAX, + tb[NL80211_PMSR_PEER_ATTR_REQ], NULL, + NULL); if (!req[NL80211_PMSR_REQ_ATTR_DATA]) { NL_SET_ERR_MSG_ATTR(info->extack, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a131f9ff979e..d7cb16f0df5b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1006,8 +1006,8 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) u8 proto = 0; int err; - err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, 0, attrs, XFRMA_MAX, + xfrma_policy, cb->extack); if (err < 0) return err; @@ -2656,9 +2656,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, } } - err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, - link->nla_max ? : XFRMA_MAX, - link->nla_pol ? : xfrma_policy, extack); + err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs, + link->nla_max ? : XFRMA_MAX, + link->nla_pol ? : xfrma_policy, extack); if (err < 0) return err; -- cgit From ef6243acb4782df587a4d7d6c310fa5b5d82684b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Apr 2019 14:07:31 +0200 Subject: genetlink: optionally validate strictly/dumps Add options to strictly validate messages and dump messages, sometimes perhaps validating dump messages non-strictly may be required, so add an option for that as well. Since none of this can really be applied to existing commands, set the options everwhere using the following spatch: @@ identifier ops; expression X; @@ struct genl_ops ops[] = { ..., { .cmd = X, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, ... }, ... }; For new commands one should just not copy the .validate 'opt-out' flags and thus get strict validation. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/block/nbd.c | 4 ++ drivers/net/gtp.c | 3 + drivers/net/ieee802154/mac802154_hwsim.c | 6 ++ drivers/net/macsec.c | 10 +++ drivers/net/team/team.c | 4 ++ drivers/net/wireless/mac80211_hwsim.c | 6 ++ drivers/target/target_core_user.c | 4 ++ fs/dlm/netlink.c | 1 + include/net/genetlink.h | 7 +++ kernel/taskstats.c | 2 + net/batman-adv/netlink.c | 18 ++++++ net/core/devlink.c | 38 +++++++++++ net/core/drop_monitor.c | 3 + net/hsr/hsr_netlink.c | 2 + net/ieee802154/nl802154.c | 29 +++++++++ net/ipv4/fou.c | 3 + net/ipv4/tcp_metrics.c | 2 + net/ipv6/ila/ila_main.c | 4 ++ net/ipv6/seg6.c | 4 ++ net/l2tp/l2tp_netlink.c | 9 +++ net/ncsi/ncsi-netlink.c | 6 ++ net/netfilter/ipvs/ip_vs_ctl.c | 16 +++++ net/netlabel/netlabel_calipso.c | 4 ++ net/netlabel/netlabel_cipso_v4.c | 4 ++ net/netlabel/netlabel_mgmt.c | 8 +++ net/netlabel/netlabel_unlabeled.c | 8 +++ net/netlink/genetlink.c | 29 ++++++++- net/nfc/netlink.c | 19 ++++++ net/openvswitch/conntrack.c | 3 + net/openvswitch/datapath.c | 13 ++++ net/openvswitch/meter.c | 4 ++ net/psample/psample.c | 1 + net/smc/smc_pnet.c | 4 ++ net/tipc/netlink.c | 21 +++++++ net/tipc/netlink_compat.c | 1 + net/wimax/stack.c | 4 ++ net/wireless/nl80211.c | 104 +++++++++++++++++++++++++++++++ 37 files changed, 405 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 69dc11f907a3..6c2dd268e603 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2003,18 +2003,22 @@ out: static const struct genl_ops nbd_connect_genl_ops[] = { { .cmd = NBD_CMD_CONNECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nbd_genl_connect, }, { .cmd = NBD_CMD_DISCONNECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nbd_genl_disconnect, }, { .cmd = NBD_CMD_RECONFIGURE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nbd_genl_reconfigure, }, { .cmd = NBD_CMD_STATUS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nbd_genl_status, }, }; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index c06e31747288..eaf4311b4004 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1270,16 +1270,19 @@ static const struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = { static const struct genl_ops gtp_genl_ops[] = { { .cmd = GTP_CMD_NEWPDP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = gtp_genl_new_pdp, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_DELPDP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = gtp_genl_del_pdp, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_GETPDP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = gtp_genl_get_pdp, .dumpit = gtp_genl_dump_pdp, .flags = GENL_ADMIN_PERM, diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 486a3a3bf35b..b187ae1a6bd6 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -594,31 +594,37 @@ static const struct nla_policy hwsim_genl_policy[MAC802154_HWSIM_ATTR_MAX + 1] = static const struct genl_ops hwsim_nl_ops[] = { { .cmd = MAC802154_HWSIM_CMD_NEW_RADIO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_RADIO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_GET_RADIO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, { .cmd = MAC802154_HWSIM_CMD_NEW_EDGE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_EDGE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_SET_EDGE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_set_edge_lqi, .flags = GENL_UNS_ADMIN_PERM, }, diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c3fa3d8da8f3..009b2902c9d3 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2637,50 +2637,60 @@ done: static const struct genl_ops macsec_genl_ops[] = { { .cmd = MACSEC_CMD_GET_TXSC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = macsec_dump_txsc, }, { .cmd = MACSEC_CMD_ADD_RXSC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_TXSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_TXSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_TXSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_RXSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_rxsa, .flags = GENL_ADMIN_PERM, }, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index be58445afbbc..2106045b3e16 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2757,20 +2757,24 @@ static int team_nl_cmd_port_list_get(struct sk_buff *skb, static const struct genl_ops team_nl_ops[] = { { .cmd = TEAM_CMD_NOOP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = team_nl_cmd_noop, }, { .cmd = TEAM_CMD_OPTIONS_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = team_nl_cmd_options_set, .flags = GENL_ADMIN_PERM, }, { .cmd = TEAM_CMD_OPTIONS_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = team_nl_cmd_options_get, .flags = GENL_ADMIN_PERM, }, { .cmd = TEAM_CMD_PORT_LIST_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = team_nl_cmd_port_list_get, .flags = GENL_ADMIN_PERM, }, diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 2a1aa2f6e7dc..0dcb511f44e2 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3637,29 +3637,35 @@ done: static const struct genl_ops hwsim_ops[] = { { .cmd = HWSIM_CMD_REGISTER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_register_received_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_cloned_frame_received_nl, }, { .cmd = HWSIM_CMD_TX_INFO_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_tx_info_frame_received_nl, }, { .cmd = HWSIM_CMD_NEW_RADIO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_DEL_RADIO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_GET_RADIO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 481d371c4b01..40b29ca5a98d 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -441,21 +441,25 @@ static int tcmu_genl_set_features(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops tcmu_genl_ops[] = { { .cmd = TCMU_CMD_SET_FEATURES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = tcmu_genl_set_features, }, { .cmd = TCMU_CMD_ADDED_DEVICE_DONE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = tcmu_genl_add_dev_done, }, { .cmd = TCMU_CMD_REMOVED_DEVICE_DONE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = tcmu_genl_rm_dev_done, }, { .cmd = TCMU_CMD_RECONFIG_DEVICE_DONE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = tcmu_genl_reconfig_dev_done, }, diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 43a96c330570..d8e27defa89f 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -68,6 +68,7 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops dlm_nl_ops[] = { { .cmd = DLM_CMD_HELLO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = user_cmd, }, }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 68de579cfe5e..9292f1c588b7 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -121,6 +121,12 @@ static inline int genl_err_attr(struct genl_info *info, int err, return err; } +enum genl_validate_flags { + GENL_DONT_VALIDATE_STRICT = BIT(0), + GENL_DONT_VALIDATE_DUMP = BIT(1), + GENL_DONT_VALIDATE_DUMP_STRICT = BIT(2), +}; + /** * struct genl_ops - generic netlink operations * @cmd: command identifier @@ -141,6 +147,7 @@ struct genl_ops { u8 cmd; u8 internal_flags; u8 flags; + u8 validate; }; int genl_register_family(struct genl_family *family); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 0e347f1c7800..5f852b8f59f7 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -649,12 +649,14 @@ err: static const struct genl_ops taskstats_ops[] = { { .cmd = TASKSTATS_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = taskstats_user_cmd, /* policy enforced later */ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_HASPOL, }, { .cmd = CGROUPSTATS_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = cgroupstats_user_cmd, /* policy enforced later */ .flags = GENL_CMD_CAP_HASPOL, diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index e7907308b331..a67720fad46c 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1343,29 +1343,34 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ .doit = batadv_netlink_get_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_algo_dump, }, { .cmd = BATADV_CMD_GET_HARDIF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ .dumpit = batadv_netlink_dump_hardif, .doit = batadv_netlink_get_hardif, @@ -1374,57 +1379,68 @@ static const struct genl_ops batadv_netlink_ops[] = { }, { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | @@ -1432,6 +1448,7 @@ static const struct genl_ops batadv_netlink_ops[] = { }, { .cmd = BATADV_CMD_GET_VLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* can be retrieved by unprivileged users */ .doit = batadv_netlink_get_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | @@ -1439,6 +1456,7 @@ static const struct genl_ops batadv_netlink_ops[] = { }, { .cmd = BATADV_CMD_SET_VLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | diff --git a/net/core/devlink.c b/net/core/devlink.c index b020d182c9fc..4e28d04c0165 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4948,6 +4948,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, @@ -4955,6 +4956,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_PORT_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_get_doit, .dumpit = devlink_nl_cmd_port_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, @@ -4962,12 +4964,14 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_PORT_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_SPLIT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_split_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -4975,6 +4979,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_unsplit_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -4982,6 +4987,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_get_doit, .dumpit = devlink_nl_cmd_sb_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -4990,6 +4996,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_POOL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_get_doit, .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -4998,6 +5005,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -5005,6 +5013,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_get_doit, .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | @@ -5013,6 +5022,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | @@ -5020,6 +5030,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | @@ -5028,6 +5039,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | @@ -5035,6 +5047,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_snapshot_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -5042,6 +5055,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_max_clear_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -5049,12 +5063,14 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_ESWITCH_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_get_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -5062,42 +5078,49 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_get, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_entries_get, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_headers_get, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_counters_set, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_set, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_dump, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_reload, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -5105,6 +5128,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_PARAM_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_get_doit, .dumpit = devlink_nl_cmd_param_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, @@ -5112,12 +5136,14 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_param_get_doit, .dumpit = devlink_nl_cmd_port_param_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, @@ -5125,12 +5151,14 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_PORT_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_param_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_REGION_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_get_doit, .dumpit = devlink_nl_cmd_region_get_dumpit, .flags = GENL_ADMIN_PERM, @@ -5138,18 +5166,21 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_REGION_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_del, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_READ, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = devlink_nl_cmd_region_read_dumpit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_INFO_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_info_get_doit, .dumpit = devlink_nl_cmd_info_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, @@ -5157,6 +5188,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, @@ -5164,24 +5196,28 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_get_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -5189,6 +5225,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | @@ -5196,6 +5233,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, { .cmd = DEVLINK_CMD_FLASH_UPDATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_flash_update, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index c7785efeea57..d4ce0542acfa 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -355,14 +355,17 @@ out: static const struct genl_ops dropmon_ops[] = { { .cmd = NET_DM_CMD_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_config, }, { .cmd = NET_DM_CMD_START, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, }, { .cmd = NET_DM_CMD_STOP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, }, }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index c2d5a368d6d8..8f8337f893ba 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -437,12 +437,14 @@ fail: static const struct genl_ops hsr_ops[] = { { .cmd = HSR_C_GET_NODE_STATUS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = hsr_get_node_status, .dumpit = NULL, }, { .cmd = HSR_C_GET_NODE_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = hsr_get_node_list, .dumpit = NULL, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 4218304cb201..e4c4174f9efb 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2209,6 +2209,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_GET_WPAN_PHY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_get_wpan_phy, .dumpit = nl802154_dump_wpan_phy, .done = nl802154_dump_wpan_phy_done, @@ -2218,6 +2219,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_GET_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_get_interface, .dumpit = nl802154_dump_interface, /* can be retrieved by unprivileged users */ @@ -2226,6 +2228,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_NEW_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_new_interface, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | @@ -2233,6 +2236,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_DEL_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_interface, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | @@ -2240,6 +2244,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_channel, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | @@ -2247,6 +2252,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_CCA_MODE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_cca_mode, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | @@ -2254,6 +2260,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_CCA_ED_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_cca_ed_level, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | @@ -2261,6 +2268,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_TX_POWER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_tx_power, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | @@ -2268,6 +2276,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_WPAN_PHY_NETNS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_wpan_phy_netns, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | @@ -2275,6 +2284,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_PAN_ID, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_pan_id, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2282,6 +2292,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_SHORT_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_short_addr, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2289,6 +2300,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_BACKOFF_EXPONENT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_backoff_exponent, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2296,6 +2308,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_MAX_CSMA_BACKOFFS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_max_csma_backoffs, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2303,6 +2316,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_MAX_FRAME_RETRIES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_max_frame_retries, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2310,6 +2324,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_LBT_MODE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_lbt_mode, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2317,6 +2332,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_SET_ACKREQ_DEFAULT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_ackreq_default, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2325,6 +2341,7 @@ static const struct genl_ops nl802154_ops[] = { #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL { .cmd = NL802154_CMD_SET_SEC_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_set_llsec_params, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2332,6 +2349,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_GET_SEC_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO .doit by matching key id? */ .dumpit = nl802154_dump_llsec_key, .flags = GENL_ADMIN_PERM, @@ -2340,6 +2358,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_NEW_SEC_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_key, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2347,6 +2366,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_DEL_SEC_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_llsec_key, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2355,6 +2375,7 @@ static const struct genl_ops nl802154_ops[] = { /* TODO unique identifier must short+pan OR extended_addr */ { .cmd = NL802154_CMD_GET_SEC_DEV, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO .doit by matching extended_addr? */ .dumpit = nl802154_dump_llsec_dev, .flags = GENL_ADMIN_PERM, @@ -2363,6 +2384,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_NEW_SEC_DEV, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_dev, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2370,6 +2392,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_DEL_SEC_DEV, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_llsec_dev, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2378,6 +2401,7 @@ static const struct genl_ops nl802154_ops[] = { /* TODO remove complete devkey, put it as nested? */ { .cmd = NL802154_CMD_GET_SEC_DEVKEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO doit by matching ??? */ .dumpit = nl802154_dump_llsec_devkey, .flags = GENL_ADMIN_PERM, @@ -2386,6 +2410,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_NEW_SEC_DEVKEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_devkey, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2393,6 +2418,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_DEL_SEC_DEVKEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_del_llsec_devkey, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2400,6 +2426,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_GET_SEC_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO .doit by matching frame_type? */ .dumpit = nl802154_dump_llsec_seclevel, .flags = GENL_ADMIN_PERM, @@ -2408,6 +2435,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_NEW_SEC_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl802154_add_llsec_seclevel, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | @@ -2415,6 +2443,7 @@ static const struct genl_ops nl802154_ops[] = { }, { .cmd = NL802154_CMD_DEL_SEC_LEVEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, /* TODO match frame_type only? */ .doit = nl802154_del_llsec_seclevel, .flags = GENL_ADMIN_PERM, diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 1ca1586a7e46..ca95051317ed 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -913,16 +913,19 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) static const struct genl_ops fou_nl_ops[] = { { .cmd = FOU_CMD_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = fou_nl_cmd_add_port, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = fou_nl_cmd_rm_port, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = fou_nl_cmd_get_port, .dumpit = fou_nl_dump, }, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9a08bfb0672c..f262f2cace29 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -951,11 +951,13 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops tcp_metrics_nl_ops[] = { { .cmd = TCP_METRICS_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tcp_metrics_nl_cmd_get, .dumpit = tcp_metrics_nl_dump, }, { .cmd = TCP_METRICS_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tcp_metrics_nl_cmd_del, .flags = GENL_ADMIN_PERM, }, diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 8d31a5066d0c..257d2b681246 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -16,21 +16,25 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { static const struct genl_ops ila_nl_ops[] = { { .cmd = ILA_CMD_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_add_mapping, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_del_mapping, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_flush, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_get_mapping, .start = ila_xlat_nl_dump_start, .dumpit = ila_xlat_nl_dump, diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index ceff773471e7..0c5479ef9b38 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -398,11 +398,13 @@ static struct pernet_operations ip6_segments_ops = { static const struct genl_ops seg6_genl_ops[] = { { .cmd = SEG6_CMD_SETHMAC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = seg6_genl_sethmac, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_DUMPHMAC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = seg6_genl_dumphmac_start, .dumpit = seg6_genl_dumphmac, .done = seg6_genl_dumphmac_done, @@ -410,11 +412,13 @@ static const struct genl_ops seg6_genl_ops[] = { }, { .cmd = SEG6_CMD_SET_TUNSRC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = seg6_genl_set_tunsrc, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_GET_TUNSRC, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = seg6_genl_get_tunsrc, .flags = GENL_ADMIN_PERM, }, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index c31b50cc48d9..6acc7f869b0c 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -915,47 +915,56 @@ static const struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = { static const struct genl_ops l2tp_nl_ops[] = { { .cmd = L2TP_CMD_NOOP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_noop, /* can be retrieved by unprivileged users */ }, { .cmd = L2TP_CMD_TUNNEL_CREATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_create, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_DELETE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_delete, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_MODIFY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_modify, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_tunnel_get, .dumpit = l2tp_nl_cmd_tunnel_dump, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_CREATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_create, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_DELETE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_delete, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_MODIFY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_modify, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = l2tp_nl_cmd_session_get, .dumpit = l2tp_nl_cmd_session_dump, .flags = GENL_ADMIN_PERM, diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 37759c88ef02..7fc4feddafa3 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -723,32 +723,38 @@ static int ncsi_set_channel_mask_nl(struct sk_buff *msg, static const struct genl_ops ncsi_ops[] = { { .cmd = NCSI_CMD_PKG_INFO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_pkg_info_nl, .dumpit = ncsi_pkg_info_all_nl, .flags = 0, }, { .cmd = NCSI_CMD_SET_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_CLEAR_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_clear_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SEND_CMD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_send_cmd_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_PACKAGE_MASK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_package_mask_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_CHANNEL_MASK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ncsi_set_channel_mask_nl, .flags = GENL_ADMIN_PERM, }, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 24bb1a7b590c..0e887159425c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3802,82 +3802,98 @@ out: static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_NEW_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_SERVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, .dumpit = ip_vs_genl_dump_services, }, { .cmd = IPVS_CMD_NEW_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_DEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = ip_vs_genl_dump_dests, }, { .cmd = IPVS_CMD_NEW_DAEMON, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_DEL_DAEMON, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_GET_DAEMON, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .dumpit = ip_vs_genl_dump_daemons, }, { .cmd = IPVS_CMD_SET_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, }, { .cmd = IPVS_CMD_GET_INFO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, }, { .cmd = IPVS_CMD_ZERO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_set_cmd, }, diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 80184513b2b2..1de87172885d 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -321,24 +321,28 @@ static int netlbl_calipso_remove(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops netlbl_calipso_ops[] = { { .cmd = NLBL_CALIPSO_C_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_calipso_add, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_calipso_remove, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_calipso_list, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LISTALL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_calipso_listall, diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 8d401df65928..5d1121981d0b 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -741,24 +741,28 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops netlbl_cipsov4_ops[] = { { .cmd = NLBL_CIPSOV4_C_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_cipsov4_add, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_cipsov4_remove, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_cipsov4_list, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LISTALL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_cipsov4_listall, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index c6c8a101f2ff..cae04f207782 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -774,48 +774,56 @@ version_failure: static const struct genl_ops netlbl_mgmt_genl_ops[] = { { .cmd = NLBL_MGMT_C_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_add, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_remove, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTALL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_mgmt_listall, }, { .cmd = NLBL_MGMT_C_ADDDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_adddef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVEDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_removedef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_mgmt_listdef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_PROTOCOLS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_mgmt_protocols, }, { .cmd = NLBL_MGMT_C_VERSION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_mgmt_version, .dumpit = NULL, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 6b1b6c2b5141..b87dd34e1835 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1317,48 +1317,56 @@ unlabel_staticlistdef_return: static const struct genl_ops netlbl_unlabel_genl_ops[] = { { .cmd = NLBL_UNLABEL_C_STATICADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticadd, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticremove, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_unlabel_staticlist, }, { .cmd = NLBL_UNLABEL_C_STATICADDDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticadddef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_staticremovedef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLISTDEF, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_unlabel_staticlistdef, }, { .cmd = NLBL_UNLABEL_C_ACCEPT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_unlabel_accept, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_LIST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_unlabel_list, .dumpit = NULL, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 994d9aff2093..72668759cd2b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -536,6 +536,24 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (ops->dumpit == NULL) return -EOPNOTSUPP; + if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) { + unsigned int validate = NL_VALIDATE_STRICT; + int hdrlen = GENL_HDRLEN + family->hdrsize; + + if (ops->validate & GENL_DONT_VALIDATE_DUMP_STRICT) + validate = NL_VALIDATE_LIBERAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), + family->maxattr, family->policy, + validate, extack); + if (rc) + return rc; + } + if (!family->parallel_ops) { struct netlink_dump_control c = { .module = family->module, @@ -577,9 +595,13 @@ static int genl_family_rcv_msg(const struct genl_family *family, attrbuf = family->attrbuf; if (attrbuf) { - err = nlmsg_parse_deprecated(nlh, hdrlen, attrbuf, - family->maxattr, family->policy, - extack); + enum netlink_validation validate = NL_VALIDATE_STRICT; + + if (ops->validate & GENL_DONT_VALIDATE_STRICT) + validate = NL_VALIDATE_LIBERAL; + + err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, + family->policy, validate, extack); if (err < 0) goto out; } @@ -939,6 +961,7 @@ static int genl_ctrl_event(int event, const struct genl_family *family, static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ctrl_getfamily, .dumpit = ctrl_dumpfamily, }, diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index c6ba308cede7..04a8e47674ec 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1669,82 +1669,101 @@ EXPORT_SYMBOL(nfc_vendor_cmd_reply); static const struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_get_device, .dumpit = nfc_genl_dump_devices, .done = nfc_genl_dump_devices_done, }, { .cmd = NFC_CMD_DEV_UP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_up, }, { .cmd = NFC_CMD_DEV_DOWN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dev_down, }, { .cmd = NFC_CMD_START_POLL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_start_poll, }, { .cmd = NFC_CMD_STOP_POLL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_stop_poll, }, { .cmd = NFC_CMD_DEP_LINK_UP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_up, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_dep_link_down, }, { .cmd = NFC_CMD_GET_TARGET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, }, { .cmd = NFC_CMD_LLC_GET_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_get_params, }, { .cmd = NFC_CMD_LLC_SET_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_set_params, }, { .cmd = NFC_CMD_LLC_SDREQ, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_llc_sdreq, }, { .cmd = NFC_CMD_FW_DOWNLOAD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_fw_download, }, { .cmd = NFC_CMD_ENABLE_SE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_enable_se, }, { .cmd = NFC_CMD_DISABLE_SE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_disable_se, }, { .cmd = NFC_CMD_GET_SE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nfc_genl_dump_ses, .done = nfc_genl_dump_ses_done, }, { .cmd = NFC_CMD_SE_IO, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_se_io, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_activate_target, }, { .cmd = NFC_CMD_VENDOR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_vendor_cmd, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nfc_genl_deactivate_target, }, }; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index ff8baf810bb3..bded32144619 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -2186,16 +2186,19 @@ exit_err: static struct genl_ops ct_limit_genl_ops[] = { { .cmd = OVS_CT_LIMIT_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ .doit = ovs_ct_limit_cmd_set, }, { .cmd = OVS_CT_LIMIT_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ .doit = ovs_ct_limit_cmd_del, }, { .cmd = OVS_CT_LIMIT_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_ct_limit_cmd_get, }, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 3b99fc3de9ac..b95015c7e999 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -639,6 +639,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { static const struct genl_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_packet_cmd_execute } @@ -1424,19 +1425,23 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_flow_cmd_set, }, @@ -1814,19 +1819,23 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { static const struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_dp_cmd_set, }, @@ -2254,19 +2263,23 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { static const struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .doit = ovs_vport_cmd_set, }, diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 9c89e8539a5a..bb67238f0340 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -526,20 +526,24 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, static struct genl_ops dp_meter_genl_ops[] = { { .cmd = OVS_METER_CMD_FEATURES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_meter_cmd_features }, { .cmd = OVS_METER_CMD_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, /* OK for unprivileged users. */ .doit = ovs_meter_cmd_get, }, { .cmd = OVS_METER_CMD_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ diff --git a/net/psample/psample.c b/net/psample/psample.c index 64f95624f219..a107b2405668 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -100,6 +100,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, static const struct genl_ops psample_nl_ops[] = { { .cmd = PSAMPLE_CMD_GET_GROUP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = psample_nl_cmd_get_group_dumpit, /* can be retrieved by unprivileged users */ } diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 9f5d8f36f2d7..bab2da8cf17a 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -612,6 +612,7 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) static const struct genl_ops smc_pnet_ops[] = { { .cmd = SMC_PNETID_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = smc_pnet_get, .dumpit = smc_pnet_dump, @@ -619,16 +620,19 @@ static const struct genl_ops smc_pnet_ops[] = { }, { .cmd = SMC_PNETID_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = smc_pnet_add }, { .cmd = SMC_PNETID_DEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = smc_pnet_del }, { .cmd = SMC_PNETID_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = smc_pnet_flush } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3d5d0fb5b37c..99bd166bccec 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -143,93 +143,114 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_disable, }, { .cmd = TIPC_NL_BEARER_ENABLE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_enable, }, { .cmd = TIPC_NL_BEARER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_get, .dumpit = tipc_nl_bearer_dump, }, { .cmd = TIPC_NL_BEARER_ADD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_add, }, { .cmd = TIPC_NL_BEARER_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_bearer_set, }, { .cmd = TIPC_NL_SOCK_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .start = tipc_dump_start, .dumpit = tipc_nl_sk_dump, .done = tipc_dump_done, }, { .cmd = TIPC_NL_PUBL_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_publ_dump, }, { .cmd = TIPC_NL_LINK_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, }, { .cmd = TIPC_NL_LINK_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_set_link, }, { .cmd = TIPC_NL_LINK_RESET_STATS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_reset_link_stats, }, { .cmd = TIPC_NL_MEDIA_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_media_get, .dumpit = tipc_nl_media_dump, }, { .cmd = TIPC_NL_MEDIA_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_media_set, }, { .cmd = TIPC_NL_NODE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_node_dump, }, { .cmd = TIPC_NL_NET_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_net_dump, }, { .cmd = TIPC_NL_NET_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_net_set, }, { .cmd = TIPC_NL_NAME_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_name_table_dump, }, { .cmd = TIPC_NL_MON_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_set_monitor, }, { .cmd = TIPC_NL_MON_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_node_get_monitor, .dumpit = tipc_nl_node_dump_monitor, }, { .cmd = TIPC_NL_MON_PEER_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_nl_node_dump_monitor_peer, }, { .cmd = TIPC_NL_PEER_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_peer_rm, }, #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = tipc_udp_nl_dump_remoteip, }, #endif diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f7269ce934b5..c6a04c09d075 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1305,6 +1305,7 @@ send: static const struct genl_ops tipc_genl_compat_ops[] = { { .cmd = TIPC_GENL_CMD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = tipc_nl_compat_recv, }, }; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index b7f571e55448..4969de672886 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -419,21 +419,25 @@ static const struct nla_policy wimax_gnl_policy[WIMAX_GNL_ATTR_MAX + 1] = { static const struct genl_ops wimax_gnl_ops[] = { { .cmd = WIMAX_GNL_OP_MSG_FROM_USER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = wimax_gnl_doit_msg_from_user, }, { .cmd = WIMAX_GNL_OP_RESET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = wimax_gnl_doit_reset, }, { .cmd = WIMAX_GNL_OP_RFKILL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = wimax_gnl_doit_rfkill, }, { .cmd = WIMAX_GNL_OP_STATE_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = wimax_gnl_doit_state_get, }, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 782c8225a90a..fffe4b371e23 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13591,6 +13591,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, @@ -13600,12 +13601,14 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_WIPHY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wiphy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, /* can be retrieved by unprivileged users */ @@ -13614,6 +13617,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -13621,6 +13625,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_NEW_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -13628,6 +13633,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_INTERFACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | @@ -13635,6 +13641,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13642,6 +13649,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13650,6 +13658,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_NEW_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13658,6 +13667,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_KEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13665,6 +13675,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_BEACON, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13672,6 +13683,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_START_AP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13679,6 +13691,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_STOP_AP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13686,6 +13699,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -13693,6 +13707,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13700,6 +13715,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_NEW_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13707,6 +13723,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_STATION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13714,6 +13731,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, .flags = GENL_UNS_ADMIN_PERM, @@ -13722,6 +13740,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_MPP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, .flags = GENL_UNS_ADMIN_PERM, @@ -13730,6 +13749,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13737,6 +13757,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_NEW_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13744,6 +13765,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_MPATH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13751,6 +13773,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_BSS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_bss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13758,6 +13781,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_REG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, .internal_flags = NL80211_FLAG_NEED_RTNL, @@ -13766,6 +13790,7 @@ static const struct genl_ops nl80211_ops[] = { #ifdef CONFIG_CFG80211_CRDA_SUPPORT { .cmd = NL80211_CMD_SET_REG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_reg, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, @@ -13773,16 +13798,19 @@ static const struct genl_ops nl80211_ops[] = { #endif { .cmd = NL80211_CMD_REQ_SET_REG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_req_set_reg, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_RELOAD_REGDB, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_reload_regdb, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_MESH_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mesh_config, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13790,6 +13818,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_MESH_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_mesh_config, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13797,6 +13826,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_TRIGGER_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_trigger_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -13804,6 +13834,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_ABORT_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_abort_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -13811,10 +13842,12 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_scan, }, { .cmd = NL80211_CMD_START_SCHED_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_sched_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13822,6 +13855,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_STOP_SCHED_SCAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_sched_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13829,6 +13863,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_AUTHENTICATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_authenticate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13837,6 +13872,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_ASSOCIATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_associate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13845,6 +13881,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEAUTHENTICATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_deauthenticate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13852,6 +13889,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DISASSOCIATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disassociate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13859,6 +13897,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_JOIN_IBSS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ibss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13866,6 +13905,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_LEAVE_IBSS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ibss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13874,6 +13914,7 @@ static const struct genl_ops nl80211_ops[] = { #ifdef CONFIG_NL80211_TESTMODE { .cmd = NL80211_CMD_TESTMODE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, .flags = GENL_UNS_ADMIN_PERM, @@ -13883,6 +13924,7 @@ static const struct genl_ops nl80211_ops[] = { #endif { .cmd = NL80211_CMD_CONNECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_connect, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13891,6 +13933,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_connect_params, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13899,6 +13942,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DISCONNECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disconnect, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13906,6 +13950,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_WIPHY_NETNS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_wiphy_netns, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -13913,10 +13958,12 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_SURVEY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_survey, }, { .cmd = NL80211_CMD_SET_PMKSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13925,6 +13972,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_PMKSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13932,6 +13980,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_FLUSH_PMKSA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_flush_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13939,6 +13988,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -13946,6 +13996,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_cancel_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -13953,6 +14004,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -13960,6 +14012,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_REGISTER_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | @@ -13967,6 +14020,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -13974,6 +14028,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt_cancel_wait, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -13981,6 +14036,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_POWER_SAVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_power_save, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -13988,6 +14044,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_POWER_SAVE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_power_save, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -13995,6 +14052,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_CQM, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_cqm, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14002,6 +14060,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_CHANNEL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14009,6 +14068,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_WDS_PEER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wds_peer, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14016,6 +14076,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_JOIN_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_mesh, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14023,6 +14084,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_LEAVE_MESH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_mesh, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14030,6 +14092,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_JOIN_OCB, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ocb, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14037,6 +14100,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_LEAVE_OCB, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ocb, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14045,6 +14109,7 @@ static const struct genl_ops nl80211_ops[] = { #ifdef CONFIG_PM { .cmd = NL80211_CMD_GET_WOWLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wowlan, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -14052,6 +14117,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_WOWLAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wowlan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -14060,6 +14126,7 @@ static const struct genl_ops nl80211_ops[] = { #endif { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_rekey_data, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14068,6 +14135,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_TDLS_MGMT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14075,6 +14143,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_TDLS_OPER, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_oper, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14082,6 +14151,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_UNEXPECTED_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_unexpected_frame, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14089,6 +14159,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_PROBE_CLIENT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_probe_client, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14096,6 +14167,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_REGISTER_BEACONS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_beacons, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -14103,6 +14175,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_NOACK_MAP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_noack_map, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14110,6 +14183,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_START_P2P_DEVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_p2p_device, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | @@ -14117,6 +14191,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_STOP_P2P_DEVICE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_p2p_device, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14124,6 +14199,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_START_NAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_nan, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | @@ -14131,6 +14207,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_STOP_NAN, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_nan, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14138,6 +14215,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_ADD_NAN_FUNCTION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_add_func, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14145,6 +14223,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_del_func, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14152,6 +14231,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_change_config, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14159,6 +14239,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_MCAST_RATE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mcast_rate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14166,6 +14247,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_MAC_ACL, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14173,6 +14255,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_RADAR_DETECT, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14180,10 +14263,12 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_protocol_features, }, { .cmd = NL80211_CMD_UPDATE_FT_IES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_ft_ies, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14191,6 +14276,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_start, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14198,6 +14284,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_stop, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14205,12 +14292,14 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_COALESCE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_coalesce, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_COALESCE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_coalesce, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | @@ -14218,6 +14307,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_CHANNEL_SWITCH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14225,6 +14315,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_VENDOR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_vendor_cmd, .dumpit = nl80211_vendor_cmd_dump, .flags = GENL_UNS_ADMIN_PERM, @@ -14234,6 +14325,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_QOS_MAP, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_qos_map, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14241,6 +14333,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_ADD_TX_TS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14248,6 +14341,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_TX_TS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_tx_ts, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14255,6 +14349,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14262,6 +14357,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_cancel_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14269,6 +14365,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_multicast_to_unicast, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | @@ -14276,6 +14373,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_PMK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -14283,12 +14381,14 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_DEL_PMK, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_external_auth, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14296,6 +14396,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_control_port, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -14303,12 +14404,14 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_pmsr_start, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | @@ -14316,6 +14419,7 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_NOTIFY_RADAR, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_notify_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | -- cgit From 795d673af1afae8146ac3070a2d77cfae5287c43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 26 Apr 2019 14:11:05 -0400 Subject: audit_compare_dname_path(): switch to const struct qstr * Signed-off-by: Al Viro --- kernel/audit.h | 2 +- kernel/audit_fsnotify.c | 2 +- kernel/audit_watch.c | 3 +-- kernel/auditfilter.c | 6 +++--- kernel/auditsc.c | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.h b/kernel/audit.h index 958d5b8fc1b3..2071725a999f 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -231,7 +231,7 @@ extern int audit_comparator(const u32 left, const u32 op, const u32 right); extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); extern int parent_len(const char *path); -extern int audit_compare_dname_path(const char *dname, const char *path, int plen); +extern int audit_compare_dname_path(const struct qstr *dname, const char *path, int plen); extern struct sk_buff *audit_make_reply(int seq, int type, int done, int multi, const void *payload, int size); extern void audit_panic(const char *message); diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index fb241805569c..b5737b826951 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -188,7 +188,7 @@ static int audit_mark_handle_event(struct fsnotify_group *group, } if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { - if (audit_compare_dname_path(dname->name, audit_mark->path, AUDIT_NAME_FULL)) + if (audit_compare_dname_path(dname, audit_mark->path, AUDIT_NAME_FULL)) return 0; audit_update_mark(audit_mark, inode); } else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF)) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index d832ce9df065..b50c574223fa 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -261,13 +261,12 @@ static void audit_update_watch(struct audit_parent *parent, struct audit_watch *owatch, *nwatch, *nextw; struct audit_krule *r, *nextr; struct audit_entry *oentry, *nentry; - const unsigned char *name = dname->name; mutex_lock(&audit_filter_mutex); /* Run all of the watches on this parent looking for the one that * matches the given dname */ list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { - if (audit_compare_dname_path(name, owatch->path, + if (audit_compare_dname_path(dname, owatch->path, AUDIT_NAME_FULL)) continue; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 63f8b3f26fab..f9fff93c3351 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1290,12 +1290,12 @@ int parent_len(const char *path) * @parentlen: length of the parent if known. Passing in AUDIT_NAME_FULL * here indicates that we must compute this value. */ -int audit_compare_dname_path(const char *dname, const char *path, int parentlen) +int audit_compare_dname_path(const struct qstr *dname, const char *path, int parentlen) { int dlen, pathlen; const char *p; - dlen = strlen(dname); + dlen = dname->len; pathlen = strlen(path); if (pathlen < dlen) return 1; @@ -1306,7 +1306,7 @@ int audit_compare_dname_path(const char *dname, const char *path, int parentlen) p = path + parentlen; - return strncmp(p, dname, dlen); + return strncmp(p, dname->name, dlen); } int audit_filter(int msgtype, unsigned int listtype) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d1eab1d4a930..92d0ae63febd 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2045,7 +2045,7 @@ void __audit_inode_child(struct inode *parent, { struct audit_context *context = audit_context(); struct inode *inode = d_backing_inode(dentry); - const char *dname = dentry->d_name.name; + const struct qstr *dname = &dentry->d_name; struct audit_names *n, *found_parent = NULL, *found_child = NULL; struct audit_entry *e; struct list_head *list = &audit_filter_list[AUDIT_FILTER_FS]; @@ -2099,7 +2099,7 @@ void __audit_inode_child(struct inode *parent, (n->type != type && n->type != AUDIT_TYPE_UNKNOWN)) continue; - if (!strcmp(dname, n->name->name) || + if (!strcmp(dname->name, n->name->name) || !audit_compare_dname_path(dname, n->name->name, found_parent ? found_parent->name_len : -- cgit From 31adf2308f33dcae59009019675224be0978bc70 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 24 Apr 2019 10:55:48 +0200 Subject: livepatch: Convert error about unsupported reliable stacktrace into a warning The commit d0807da78e11d46f ("livepatch: Remove immediate feature") caused that any livepatch was refused when reliable stacktraces were not supported on the given architecture. The limitation is too strong. User space processes are safely migrated even when entering or leaving the kernel. Kthreads transition would need to get forced. But it is safe when: + The livepatch does not change the semantic of the code. + Callbacks do not depend on a safely finished transition. Suggested-by: Josh Poimboeuf Acked-by: Josh Poimboeuf Acked-by: Miroslav Benes Reviewed-by: Kamalesh Babulal Signed-off-by: Petr Mladek --- kernel/livepatch/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index eb0ee10a1981..14f33ab6c583 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -1003,11 +1003,10 @@ int klp_enable_patch(struct klp_patch *patch) return -ENODEV; if (!klp_have_reliable_stack()) { - pr_err("This architecture doesn't have support for the livepatch consistency model.\n"); - return -EOPNOTSUPP; + pr_warn("This architecture doesn't have support for the livepatch consistency model.\n"); + pr_warn("The livepatch transition may never complete.\n"); } - mutex_lock(&klp_mutex); ret = klp_init_patch_early(patch); -- cgit From 08970ecf744e09837bb6620c95406710f4c81ae2 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 18 Apr 2019 16:54:01 +0100 Subject: irq/irqdomain: Fix typo in the comment on top of __irq_domain_alloc_irqs() The word 'number' has been misspelt in the comment on top of _irq_domain_alloc_irqs(). Signed-off-by: Julien Grall Signed-off-by: Marc Zyngier --- kernel/irq/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 9ed29e4a7dbf..a453e229f99c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1297,7 +1297,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, /** * __irq_domain_alloc_irqs - Allocate IRQs from domain * @domain: domain to allocate from - * @irq_base: allocate specified IRQ nubmer if irq_base >= 0 + * @irq_base: allocate specified IRQ number if irq_base >= 0 * @nr_irqs: number of IRQs to allocate * @node: NUMA node id for memory allocation * @arg: domain specific argument -- cgit From 43d8ce9d65a54846d378545770991e65838981e0 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Fri, 26 Apr 2019 15:04:29 -0400 Subject: Provide in-kernel headers to make extending kernel easier Introduce in-kernel headers which are made available as an archive through proc (/proc/kheaders.tar.xz file). This archive makes it possible to run eBPF and other tracing programs that need to extend the kernel for tracing purposes without any dependency on the file system having headers. A github PR is sent for the corresponding BCC patch at: https://github.com/iovisor/bcc/pull/2312 On Android and embedded systems, it is common to switch kernels but not have kernel headers available on the file system. Further once a different kernel is booted, any headers stored on the file system will no longer be useful. This is an issue even well known to distros. By storing the headers as a compressed archive within the kernel, we can avoid these issues that have been a hindrance for a long time. The best way to use this feature is by building it in. Several users have a need for this, when they switch debug kernels, they do not want to update the filesystem or worry about it where to store the headers on it. However, the feature is also buildable as a module in case the user desires it not being part of the kernel image. This makes it possible to load and unload the headers from memory on demand. A tracing program can load the module, do its operations, and then unload the module to save kernel memory. The total memory needed is 3.3MB. By having the archive available at a fixed location independent of filesystem dependencies and conventions, all debugging tools can directly refer to the fixed location for the archive, without concerning with where the headers on a typical filesystem which significantly simplifies tooling that needs kernel headers. The code to read the headers is based on /proc/config.gz code and uses the same technique to embed the headers. Other approaches were discussed such as having an in-memory mountable filesystem, but that has drawbacks such as requiring an in-kernel xz decompressor which we don't have today, and requiring usage of 42 MB of kernel memory to host the decompressed headers at anytime. Also this approach is simpler than such approaches. Reviewed-by: Masahiro Yamada Signed-off-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 10 ++++++ kernel/.gitignore | 1 + kernel/Makefile | 10 ++++++ kernel/gen_ikh_data.sh | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/kheaders.c | 74 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+) create mode 100755 kernel/gen_ikh_data.sh create mode 100644 kernel/kheaders.c (limited to 'kernel') diff --git a/init/Kconfig b/init/Kconfig index 4592bf7997c0..47c0db6e63a5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -580,6 +580,16 @@ config IKCONFIG_PROC This option enables access to the kernel configuration file through /proc/config.gz. +config IKHEADERS_PROC + tristate "Enable kernel header artifacts through /proc/kheaders.tar.xz" + depends on PROC_FS + help + This option enables access to the kernel header and other artifacts that + are generated during the build process. These can be used to build eBPF + tracing programs, or similar programs. If you build the headers as a + module, a module called kheaders.ko is built which can be loaded on-demand + to get access to the headers. + config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" range 12 25 diff --git a/kernel/.gitignore b/kernel/.gitignore index 6e699100872f..34d1e77ee9df 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -1,5 +1,6 @@ # # Generated files # +kheaders.md5 timeconst.h hz.bc diff --git a/kernel/Makefile b/kernel/Makefile index 6c57e78817da..12399614c350 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_USER_NS) += user_namespace.o obj-$(CONFIG_PID_NS) += pid_namespace.o obj-$(CONFIG_IKCONFIG) += configs.o +obj-$(CONFIG_IKHEADERS_PROC) += kheaders.o obj-$(CONFIG_SMP) += stop_machine.o obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o @@ -121,3 +122,12 @@ $(obj)/configs.o: $(obj)/config_data.gz targets += config_data.gz $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(call if_changed,gzip) + +$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz + +quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz +cmd_genikh = $(srctree)/kernel/gen_ikh_data.sh $@ +$(obj)/kheaders_data.tar.xz: FORCE + $(call cmd,genikh) + +clean-files := kheaders_data.tar.xz kheaders.md5 diff --git a/kernel/gen_ikh_data.sh b/kernel/gen_ikh_data.sh new file mode 100755 index 000000000000..591a94f7b387 --- /dev/null +++ b/kernel/gen_ikh_data.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This script generates an archive consisting of kernel headers +# for CONFIG_IKHEADERS_PROC. +set -e +spath="$(dirname "$(readlink -f "$0")")" +kroot="$spath/.." +outdir="$(pwd)" +tarfile=$1 +cpio_dir=$outdir/$tarfile.tmp + +# Script filename relative to the kernel source root +# We add it to the archive because it is small and any changes +# to this script will also cause a rebuild of the archive. +sfile="$(realpath --relative-to $kroot "$(readlink -f "$0")")" + +src_file_list=" +include/ +arch/$SRCARCH/include/ +$sfile +" + +obj_file_list=" +include/ +arch/$SRCARCH/include/ +" + +# Support incremental builds by skipping archive generation +# if timestamps of files being archived are not changed. + +# This block is useful for debugging the incremental builds. +# Uncomment it for debugging. +# iter=1 +# if [ ! -f /tmp/iter ]; then echo 1 > /tmp/iter; +# else; iter=$(($(cat /tmp/iter) + 1)); fi +# find $src_file_list -type f | xargs ls -lR > /tmp/src-ls-$iter +# find $obj_file_list -type f | xargs ls -lR > /tmp/obj-ls-$iter + +# include/generated/compile.h is ignored because it is touched even when none +# of the source files changed. This causes pointless regeneration, so let us +# ignore them for md5 calculation. +pushd $kroot > /dev/null +src_files_md5="$(find $src_file_list -type f | + grep -v "include/generated/compile.h" | + xargs ls -lR | md5sum | cut -d ' ' -f1)" +popd > /dev/null +obj_files_md5="$(find $obj_file_list -type f | + grep -v "include/generated/compile.h" | + xargs ls -lR | md5sum | cut -d ' ' -f1)" + +if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi +if [ -f kernel/kheaders.md5 ] && + [ "$(cat kernel/kheaders.md5|head -1)" == "$src_files_md5" ] && + [ "$(cat kernel/kheaders.md5|head -2|tail -1)" == "$obj_files_md5" ] && + [ "$(cat kernel/kheaders.md5|tail -1)" == "$tarfile_md5" ]; then + exit +fi + +if [ "${quiet}" != "silent_" ]; then + echo " GEN $tarfile" +fi + +rm -rf $cpio_dir +mkdir $cpio_dir + +pushd $kroot > /dev/null +for f in $src_file_list; + do find "$f" ! -name "*.cmd" ! -name ".*"; +done | cpio --quiet -pd $cpio_dir +popd > /dev/null + +# The second CPIO can complain if files already exist which can +# happen with out of tree builds. Just silence CPIO for now. +for f in $obj_file_list; + do find "$f" ! -name "*.cmd" ! -name ".*"; +done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 + +# Remove comments except SDPX lines +find $cpio_dir -type f -print0 | + xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;' + +tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null + +echo "$src_files_md5" > kernel/kheaders.md5 +echo "$obj_files_md5" >> kernel/kheaders.md5 +echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 + +rm -rf $cpio_dir diff --git a/kernel/kheaders.c b/kernel/kheaders.c new file mode 100644 index 000000000000..70ae6052920d --- /dev/null +++ b/kernel/kheaders.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide kernel headers useful to build tracing programs + * such as for running eBPF tracing tools. + * + * (Borrowed code from kernel/configs.c) + */ + +#include +#include +#include +#include +#include + +/* + * Define kernel_headers_data and kernel_headers_data_end, within which the + * compressed kernel headers are stored. The file is first compressed with xz. + */ + +asm ( +" .pushsection .rodata, \"a\" \n" +" .global kernel_headers_data \n" +"kernel_headers_data: \n" +" .incbin \"kernel/kheaders_data.tar.xz\" \n" +" .global kernel_headers_data_end \n" +"kernel_headers_data_end: \n" +" .popsection \n" +); + +extern char kernel_headers_data; +extern char kernel_headers_data_end; + +static ssize_t +ikheaders_read_current(struct file *file, char __user *buf, + size_t len, loff_t *offset) +{ + return simple_read_from_buffer(buf, len, offset, + &kernel_headers_data, + &kernel_headers_data_end - + &kernel_headers_data); +} + +static const struct file_operations ikheaders_file_ops = { + .read = ikheaders_read_current, + .llseek = default_llseek, +}; + +static int __init ikheaders_init(void) +{ + struct proc_dir_entry *entry; + + /* create the current headers file */ + entry = proc_create("kheaders.tar.xz", S_IRUGO, NULL, + &ikheaders_file_ops); + if (!entry) + return -ENOMEM; + + proc_set_size(entry, + &kernel_headers_data_end - + &kernel_headers_data); + return 0; +} + +static void __exit ikheaders_cleanup(void) +{ + remove_proc_entry("kheaders.tar.xz", NULL); +} + +module_init(ikheaders_init); +module_exit(ikheaders_cleanup); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Joel Fernandes"); +MODULE_DESCRIPTION("Echo the kernel header artifacts used to build the kernel"); -- cgit From 2bd1298ac17777525a41c8425521f569e412df14 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Tue, 30 Apr 2019 15:42:22 +0530 Subject: genirq: Introduce irq_chip_{request,release}_resource_parent() apis Introduce irq_chip_{request,release}_resource_parent() apis so that these can be used in hierarchical irqchips. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier --- include/linux/irq.h | 2 ++ kernel/irq/chip.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'kernel') diff --git a/include/linux/irq.h b/include/linux/irq.h index 7ae8de5ad0f2..fb301cf29148 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -625,6 +625,8 @@ extern int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on); extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info); extern int irq_chip_set_type_parent(struct irq_data *data, unsigned int type); +extern int irq_chip_request_resources_parent(struct irq_data *data); +extern void irq_chip_release_resources_parent(struct irq_data *data); #endif /* Handling of unhandled and spurious interrupts: */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 51128bea3846..29d6c7d070b4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1459,6 +1459,33 @@ int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) return -ENOSYS; } EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent); + +/** + * irq_chip_request_resources_parent - Request resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +int irq_chip_request_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + + if (data->chip->irq_request_resources) + return data->chip->irq_request_resources(data); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); + +/** + * irq_chip_release_resources_parent - Release resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_release_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_release_resources) + data->chip->irq_release_resources(data); +} +EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); #endif /** -- cgit From 524845ff9c473d315468fa3b54054a7e6b2d95cf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 15 Apr 2019 22:31:29 -0400 Subject: bpf: switch to ->free_inode() Acked-by: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Al Viro --- kernel/bpf/inode.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 4a8f390a2b82..bc53e5b20ddc 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -566,9 +566,8 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void bpf_destroy_inode_deferred(struct rcu_head *head) +static void bpf_free_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); enum bpf_type type; if (S_ISLNK(inode->i_mode)) @@ -578,16 +577,11 @@ static void bpf_destroy_inode_deferred(struct rcu_head *head) free_inode_nonrcu(inode); } -static void bpf_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred); -} - static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, - .destroy_inode = bpf_destroy_inode, + .free_inode = bpf_free_inode, }; enum { -- cgit From a5d5092c9285f6c8937b56f9c6ff2b22d818fc25 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 26 Feb 2019 13:16:14 -0600 Subject: gdbstub: mark expected switch fall-throughs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warnings: kernel/debug/gdbstub.c: In function ‘gdb_serial_stub’: kernel/debug/gdbstub.c:1031:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (remcom_in_buffer[1] == '\0') { ^ kernel/debug/gdbstub.c:1036:3: note: here case 'C': /* Exception passing */ ^~~~ kernel/debug/gdbstub.c:1040:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (tmp == 0) ^ kernel/debug/gdbstub.c:1043:3: note: here case 'c': /* Continue packet */ ^~~~ kernel/debug/gdbstub.c:1050:4: warning: this statement may fall through [-Wimplicit-fallthrough=] dbg_activate_sw_breakpoints(); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/debug/gdbstub.c:1052:3: note: here default: ^~~~~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Acked-by: Jason Wessel Signed-off-by: Daniel Thompson --- kernel/debug/gdbstub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 7510dc687c0d..9f267b8905b4 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -1033,13 +1033,14 @@ int gdb_serial_stub(struct kgdb_state *ks) return DBG_PASS_EVENT; } #endif + /* Fall through */ case 'C': /* Exception passing */ tmp = gdb_cmd_exception_pass(ks); if (tmp > 0) goto default_handle; if (tmp == 0) break; - /* Fall through on tmp < 0 */ + /* Fall through - on tmp < 0 */ case 'c': /* Continue packet */ case 's': /* Single step packet */ if (kgdb_contthread && kgdb_contthread != current) { @@ -1048,7 +1049,7 @@ int gdb_serial_stub(struct kgdb_state *ks) break; } dbg_activate_sw_breakpoints(); - /* Fall through to default processing */ + /* Fall through - to default processing */ default: default_handle: error = kgdb_arch_handle_exception(ks->ex_vector, -- cgit From 4cc168eaf3b67d76547fb420c22abe22a3c86003 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 Apr 2019 11:33:42 -0500 Subject: gdbstub: Replace strcpy() by strscpy() The strcpy() function is being deprecated. Replace it by the safer strscpy() and fix the following Coverity warning: "You might overrun the 1024-character fixed-size string remcom_in_buffer by copying cmd without checking the length." Addresses-Coverity-ID: 138999 ("Copy into fixed size buffer") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Thompson --- kernel/debug/gdbstub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 9f267b8905b4..4b280fc7dd67 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -1095,10 +1095,10 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) return error; case 's': case 'c': - strcpy(remcom_in_buffer, cmd); + strscpy(remcom_in_buffer, cmd, sizeof(remcom_in_buffer)); return 0; case '$': - strcpy(remcom_in_buffer, cmd); + strscpy(remcom_in_buffer, cmd, sizeof(remcom_in_buffer)); gdbstub_use_prev_in_buf = strlen(remcom_in_buffer); gdbstub_prev_in_buf_pos = 0; return 0; -- cgit From 9b555c4d784c468b4167eef9ab621b5203e4f479 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 Apr 2019 11:21:06 -0500 Subject: kdb: kdb_support: replace strcpy() by strscpy() The strcpy() function is being deprecated. Replace it by the safer strscpy() and fix the following Coverity warning: "You might overrun the 129-character fixed-size string ks_namebuf by copying name without checking the length." Addresses-Coverity-ID: 138995 ("Copy into fixed size buffer") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_support.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 50bf9b119bad..b8e6306e7e13 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -192,7 +192,7 @@ int kallsyms_symbol_complete(char *prefix_name, int max_len) while ((name = kdb_walk_kallsyms(&pos))) { if (strncmp(name, prefix_name, prefix_len) == 0) { - strcpy(ks_namebuf, name); + strscpy(ks_namebuf, name, sizeof(ks_namebuf)); /* Work out the longest name that matches the prefix */ if (++number == 1) { prev_len = min_t(int, max_len-1, -- cgit From dbfe67334a1767bcb7be8b50bd237b22b272ef23 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 19 Mar 2019 10:12:04 -0700 Subject: tracing: kdb: The skip_lines parameter should have been skip_entries The things skipped by kdb's "ftdump" command when you pass it a parameter has always been entries, not lines. The difference usually doesn't matter but when the trace buffer has multi-line entries (like a stack dump) it can matter. Let's fix this both in the help text for ftdump and also in the local variable names. Link: http://lkml.kernel.org/r/20190319171206.97107-1-dianders@chromium.org Acked-by: Daniel Thompson Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kdb.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 810d78a8d14c..4b666643d69f 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -17,7 +17,7 @@ #include "trace.h" #include "trace_output.h" -static void ftrace_dump_buf(int skip_lines, long cpu_file) +static void ftrace_dump_buf(int skip_entries, long cpu_file) { /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; @@ -70,11 +70,11 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) kdb_printf("---------------------------------\n"); cnt++; - if (!skip_lines) { + if (!skip_entries) { print_trace_line(&iter); trace_printk_seq(&iter.seq); } else { - skip_lines--; + skip_entries--; } if (KDB_FLAG(CMD_INTERRUPT)) @@ -106,7 +106,7 @@ out: */ static int kdb_ftdump(int argc, const char **argv) { - int skip_lines = 0; + int skip_entries = 0; long cpu_file; char *cp; @@ -114,9 +114,9 @@ static int kdb_ftdump(int argc, const char **argv) return KDB_ARGCOUNT; if (argc) { - skip_lines = simple_strtol(argv[1], &cp, 0); + skip_entries = simple_strtol(argv[1], &cp, 0); if (*cp) - skip_lines = 0; + skip_entries = 0; } if (argc == 2) { @@ -129,7 +129,7 @@ static int kdb_ftdump(int argc, const char **argv) } kdb_trap_printk++; - ftrace_dump_buf(skip_lines, cpu_file); + ftrace_dump_buf(skip_entries, cpu_file); kdb_trap_printk--; return 0; @@ -137,7 +137,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { - kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", + kdb_register_flags("ftdump", kdb_ftdump, "[skip_#entries] [cpu]", "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE); return 0; } -- cgit From ecffc8a8c7301f6f3c731ba23e38cd049a046416 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 19 Mar 2019 10:12:05 -0700 Subject: tracing: Add trace_total_entries() / trace_total_entries_cpu() These two new exported functions will be used in a future patch by kdb_ftdump() to quickly skip all but the last few trace entries. Link: http://lkml.kernel.org/r/20190319171206.97107-2-dianders@chromium.org Acked-by: Daniel Thompson Suggested-by: Steven Rostedt Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 65 ++++++++++++++++++++++++++++++++++++++++------------ kernel/trace/trace.h | 3 +++ 2 files changed, 53 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2bc18de7f0dc..dcb9adb44be9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3492,34 +3492,69 @@ static void s_stop(struct seq_file *m, void *p) trace_event_read_unlock(); } +static void +get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total, + unsigned long *entries, int cpu) +{ + unsigned long count; + + count = ring_buffer_entries_cpu(buf->buffer, cpu); + /* + * If this buffer has skipped entries, then we hold all + * entries for the trace and we need to ignore the + * ones before the time stamp. + */ + if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { + count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; + /* total is the same as the entries */ + *total = count; + } else + *total = count + + ring_buffer_overrun_cpu(buf->buffer, cpu); + *entries = count; +} + static void get_total_entries(struct trace_buffer *buf, unsigned long *total, unsigned long *entries) { - unsigned long count; + unsigned long t, e; int cpu; *total = 0; *entries = 0; for_each_tracing_cpu(cpu) { - count = ring_buffer_entries_cpu(buf->buffer, cpu); - /* - * If this buffer has skipped entries, then we hold all - * entries for the trace and we need to ignore the - * ones before the time stamp. - */ - if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { - count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; - /* total is the same as the entries */ - *total += count; - } else - *total += count + - ring_buffer_overrun_cpu(buf->buffer, cpu); - *entries += count; + get_total_entries_cpu(buf, &t, &e, cpu); + *total += t; + *entries += e; } } +unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) +{ + unsigned long total, entries; + + if (!tr) + tr = &global_trace; + + get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu); + + return entries; +} + +unsigned long trace_total_entries(struct trace_array *tr) +{ + unsigned long total, entries; + + if (!tr) + tr = &global_trace; + + get_total_entries(&tr->trace_buffer, &total, &entries); + + return entries; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index da00a3d508c1..33f14b9e78b7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -721,6 +721,9 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); +unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu); +unsigned long trace_total_entries(struct trace_array *tr); + void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, -- cgit From 03197fc02b356606355d7ede343b18e3e3737771 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 19 Mar 2019 10:12:06 -0700 Subject: tracing: kdb: Allow ftdump to skip all but the last few entries The 'ftdump' command in kdb is currently a bit of a last resort, at least if you have lots of traces turned on. It's going to print a whole boatload of data out your serial port which is probably running at 115200. This could easily take many, many minutes. Usually you're most interested in what's at the _end_ of the ftrace buffer, AKA what happened most recently. That means you've got to wait the full time for the dump. The 'ftdump' command does attempt to help you a little bit by allowing you to skip a fixed number of entries. Unfortunately it provides no way for you to know how many entries you should skip. Let's do similar to python and allow you to use a negative number to indicate that you want to skip all entries except the last few. This allows you to quickly see what you want. Note that we also change the printout in ftdump to print the (positive) number of entries actually skipped since that could be helpful to know when you've specified a negative skip count. Link: http://lkml.kernel.org/r/20190319171206.97107-3-dianders@chromium.org Signed-off-by: Douglas Anderson Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kdb.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 4b666643d69f..6c1ae6b752d1 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -17,29 +17,25 @@ #include "trace.h" #include "trace_output.h" +static struct trace_iterator iter; +static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; + static void ftrace_dump_buf(int skip_entries, long cpu_file) { - /* use static because iter can be a bit big for the stack */ - static struct trace_iterator iter; - static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; struct trace_array *tr; unsigned int old_userobj; int cnt = 0, cpu; - trace_init_global_iter(&iter); - iter.buffer_iter = buffer_iter; tr = iter.tr; - for_each_tracing_cpu(cpu) { - atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); - } - old_userobj = tr->trace_flags; /* don't look at user memory in panic mode */ tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; kdb_printf("Dumping ftrace buffer:\n"); + if (skip_entries) + kdb_printf("(skipping %d entries)\n", skip_entries); /* reset all but tr, trace, and overruns */ memset(&iter.seq, 0, @@ -89,10 +85,6 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) out: tr->trace_flags = old_userobj; - for_each_tracing_cpu(cpu) { - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); - } - for_each_tracing_cpu(cpu) { if (iter.buffer_iter[cpu]) { ring_buffer_read_finish(iter.buffer_iter[cpu]); @@ -109,6 +101,8 @@ static int kdb_ftdump(int argc, const char **argv) int skip_entries = 0; long cpu_file; char *cp; + int cnt; + int cpu; if (argc > 2) return KDB_ARGCOUNT; @@ -129,7 +123,29 @@ static int kdb_ftdump(int argc, const char **argv) } kdb_trap_printk++; + + trace_init_global_iter(&iter); + iter.buffer_iter = buffer_iter; + + for_each_tracing_cpu(cpu) { + atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + } + + /* A negative skip_entries means skip all but the last entries */ + if (skip_entries < 0) { + if (cpu_file == RING_BUFFER_ALL_CPUS) + cnt = trace_total_entries(NULL); + else + cnt = trace_total_entries_cpu(NULL, cpu_file); + skip_entries = max(cnt + skip_entries, 0); + } + ftrace_dump_buf(skip_entries, cpu_file); + + for_each_tracing_cpu(cpu) { + atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); + } + kdb_trap_printk--; return 0; @@ -138,7 +154,8 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { kdb_register_flags("ftdump", kdb_ftdump, "[skip_#entries] [cpu]", - "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE); + "Dump ftrace log; -skip dumps last #entries", 0, + KDB_ENABLE_ALWAYS_SAFE); return 0; } -- cgit From aaebdf8d68479f78d9f72b239684f70fbb0722c6 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 1 May 2019 14:58:18 +0100 Subject: genirq/msi: Add a new field in msi_desc to store an IOMMU cookie When an MSI doorbell is located downstream of an IOMMU, it is required to swizzle the physical address with an appropriately-mapped IOVA for any device attached to one of our DMA ops domain. At the moment, the allocation of the mapping may be done when composing the message. However, the composing may be done in non-preemtible context while the allocation requires to be called from preemptible context. A follow-up change will split the current logic in two functions requiring to keep an IOMMU cookie per MSI. A new field is introduced in msi_desc to store an IOMMU cookie. As the cookie may not be required in some configuration, the field is protected under a new config CONFIG_IRQ_MSI_IOMMU. A pair of helpers has also been introduced to access the field. Signed-off-by: Julien Grall Reviewed-by: Robin Murphy Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/msi.h | 26 ++++++++++++++++++++++++++ kernel/irq/Kconfig | 3 +++ 2 files changed, 29 insertions(+) (limited to 'kernel') diff --git a/include/linux/msi.h b/include/linux/msi.h index 7c28762851a3..3eb42ede0114 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -86,6 +86,9 @@ struct msi_desc { struct device *dev; struct msi_msg msg; struct irq_affinity_desc *affinity; +#ifdef CONFIG_IRQ_MSI_IOMMU + const void *iommu_cookie; +#endif union { /* PCI MSI/X specific data */ @@ -129,6 +132,29 @@ struct msi_desc { #define for_each_msi_entry_safe(desc, tmp, dev) \ list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) +#ifdef CONFIG_IRQ_MSI_IOMMU +static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) +{ + return desc->iommu_cookie; +} + +static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, + const void *iommu_cookie) +{ + desc->iommu_cookie = iommu_cookie; +} +#else +static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) +{ + return NULL; +} + +static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, + const void *iommu_cookie) +{ +} +#endif + #ifdef CONFIG_PCI_MSI #define first_pci_msi_entry(pdev) first_msi_entry(&(pdev)->dev) #define for_each_pci_msi_entry(desc, pdev) \ diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5f3e2baefca9..8fee06625c37 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -91,6 +91,9 @@ config GENERIC_MSI_IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY select GENERIC_MSI_IRQ +config IRQ_MSI_IOMMU + bool + config HANDLE_DOMAIN_IRQ bool -- cgit From 4d141ab3416d90f87775f5dee725efdf40110a8f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 3 May 2019 15:26:24 +0200 Subject: livepatch: Remove custom kobject state handling kobject_init() always succeeds and sets the reference count to 1. It allows to always free the structures via kobject_put() and the related release callback. Note that the custom kobject state handling was used only because we did not know that kobject_put() can and actually should get called even when kobject_init_and_add() fails. The patch should not change the existing behavior. Suggested-by: "Tobin C. Harding" Signed-off-by: Petr Mladek Reviewed-by: Kamalesh Babulal Acked-by: Joe Lawrence Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 3 --- kernel/livepatch/core.c | 56 ++++++++++++++--------------------------------- 2 files changed, 17 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 53551f470722..a14bab1a0a3e 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -86,7 +86,6 @@ struct klp_func { struct list_head node; struct list_head stack_node; unsigned long old_size, new_size; - bool kobj_added; bool nop; bool patched; bool transition; @@ -141,7 +140,6 @@ struct klp_object { struct list_head func_list; struct list_head node; struct module *mod; - bool kobj_added; bool dynamic; bool patched; }; @@ -170,7 +168,6 @@ struct klp_patch { struct list_head list; struct kobject kobj; struct list_head obj_list; - bool kobj_added; bool enabled; bool forced; struct work_struct free_work; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 14f33ab6c583..42385f23252a 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -426,6 +426,9 @@ static void klp_free_object_dynamic(struct klp_object *obj) kfree(obj); } +static struct kobj_type klp_ktype_object; +static struct kobj_type klp_ktype_func; + static struct klp_object *klp_alloc_object_dynamic(const char *name) { struct klp_object *obj; @@ -443,6 +446,7 @@ static struct klp_object *klp_alloc_object_dynamic(const char *name) } INIT_LIST_HEAD(&obj->func_list); + kobject_init(&obj->kobj, &klp_ktype_object); obj->dynamic = true; return obj; @@ -471,6 +475,7 @@ static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func, } } + kobject_init(&func->kobj, &klp_ktype_func); /* * func->new_func is same as func->old_func. These addresses are * set when the object is loaded, see klp_init_object_loaded(). @@ -588,13 +593,7 @@ static void __klp_free_funcs(struct klp_object *obj, bool nops_only) continue; list_del(&func->node); - - /* Might be called from klp_init_patch() error path. */ - if (func->kobj_added) { - kobject_put(&func->kobj); - } else if (func->nop) { - klp_free_func_nop(func); - } + kobject_put(&func->kobj); } } @@ -624,13 +623,7 @@ static void __klp_free_objects(struct klp_patch *patch, bool nops_only) continue; list_del(&obj->node); - - /* Might be called from klp_init_patch() error path. */ - if (obj->kobj_added) { - kobject_put(&obj->kobj); - } else if (obj->dynamic) { - klp_free_object_dynamic(obj); - } + kobject_put(&obj->kobj); } } @@ -675,10 +668,8 @@ static void klp_free_patch_finish(struct klp_patch *patch) * this is called when the patch gets disabled and it * cannot get enabled again. */ - if (patch->kobj_added) { - kobject_put(&patch->kobj); - wait_for_completion(&patch->finish); - } + kobject_put(&patch->kobj); + wait_for_completion(&patch->finish); /* Put the module after the last access to struct klp_patch. */ if (!patch->forced) @@ -700,8 +691,6 @@ static void klp_free_patch_work_fn(struct work_struct *work) static int klp_init_func(struct klp_object *obj, struct klp_func *func) { - int ret; - if (!func->old_name) return -EINVAL; @@ -724,13 +713,9 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func) * object. If the user selects 0 for old_sympos, then 1 will be used * since a unique symbol will be the first occurrence. */ - ret = kobject_init_and_add(&func->kobj, &klp_ktype_func, - &obj->kobj, "%s,%lu", func->old_name, - func->old_sympos ? func->old_sympos : 1); - if (!ret) - func->kobj_added = true; - - return ret; + return kobject_add(&func->kobj, &obj->kobj, "%s,%lu", + func->old_name, + func->old_sympos ? func->old_sympos : 1); } /* Arches may override this to finish any remaining arch-specific tasks */ @@ -801,11 +786,9 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) klp_find_object_module(obj); name = klp_is_module(obj) ? obj->name : "vmlinux"; - ret = kobject_init_and_add(&obj->kobj, &klp_ktype_object, - &patch->kobj, "%s", name); + ret = kobject_add(&obj->kobj, &patch->kobj, "%s", name); if (ret) return ret; - obj->kobj_added = true; klp_for_each_func(obj, func) { ret = klp_init_func(obj, func); @@ -829,7 +812,7 @@ static int klp_init_patch_early(struct klp_patch *patch) INIT_LIST_HEAD(&patch->list); INIT_LIST_HEAD(&patch->obj_list); - patch->kobj_added = false; + kobject_init(&patch->kobj, &klp_ktype_patch); patch->enabled = false; patch->forced = false; INIT_WORK(&patch->free_work, klp_free_patch_work_fn); @@ -840,11 +823,11 @@ static int klp_init_patch_early(struct klp_patch *patch) return -EINVAL; INIT_LIST_HEAD(&obj->func_list); - obj->kobj_added = false; + kobject_init(&obj->kobj, &klp_ktype_object); list_add_tail(&obj->node, &patch->obj_list); klp_for_each_func_static(obj, func) { - func->kobj_added = false; + kobject_init(&func->kobj, &klp_ktype_func); list_add_tail(&func->node, &obj->func_list); } } @@ -860,11 +843,9 @@ static int klp_init_patch(struct klp_patch *patch) struct klp_object *obj; int ret; - ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch, - klp_root_kobj, "%s", patch->mod->name); + ret = kobject_add(&patch->kobj, klp_root_kobj, "%s", patch->mod->name); if (ret) return ret; - patch->kobj_added = true; if (patch->replace) { ret = klp_add_nops(patch); @@ -926,9 +907,6 @@ static int __klp_enable_patch(struct klp_patch *patch) if (WARN_ON(patch->enabled)) return -EINVAL; - if (!patch->kobj_added) - return -EINVAL; - pr_notice("enabling patch '%s'\n", patch->mod->name); klp_init_transition(patch, KLP_PATCHED); -- cgit From f68d67cf2f83dc82675969724b59ca7c6da43fa9 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 3 May 2019 15:26:25 +0200 Subject: livepatch: Remove duplicated code for early initialization kobject_init() call added one more operation that has to be done when doing the early initialization of both static and dynamic livepatch structures. It would have been easier when the early initialization code was not duplicated. Let's deduplicate it for future generations of livepatching hackers. The patch does not change the existing behavior. Signed-off-by: Petr Mladek Reviewed-by: Kamalesh Babulal Acked-by: Joe Lawrence Signed-off-by: Jiri Kosina --- kernel/livepatch/core.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 42385f23252a..f12c0eabd843 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -426,10 +426,13 @@ static void klp_free_object_dynamic(struct klp_object *obj) kfree(obj); } -static struct kobj_type klp_ktype_object; -static struct kobj_type klp_ktype_func; +static void klp_init_func_early(struct klp_object *obj, + struct klp_func *func); +static void klp_init_object_early(struct klp_patch *patch, + struct klp_object *obj); -static struct klp_object *klp_alloc_object_dynamic(const char *name) +static struct klp_object *klp_alloc_object_dynamic(const char *name, + struct klp_patch *patch) { struct klp_object *obj; @@ -445,8 +448,7 @@ static struct klp_object *klp_alloc_object_dynamic(const char *name) } } - INIT_LIST_HEAD(&obj->func_list); - kobject_init(&obj->kobj, &klp_ktype_object); + klp_init_object_early(patch, obj); obj->dynamic = true; return obj; @@ -475,7 +477,7 @@ static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func, } } - kobject_init(&func->kobj, &klp_ktype_func); + klp_init_func_early(obj, func); /* * func->new_func is same as func->old_func. These addresses are * set when the object is loaded, see klp_init_object_loaded(). @@ -495,11 +497,9 @@ static int klp_add_object_nops(struct klp_patch *patch, obj = klp_find_object(patch, old_obj); if (!obj) { - obj = klp_alloc_object_dynamic(old_obj->name); + obj = klp_alloc_object_dynamic(old_obj->name, patch); if (!obj) return -ENOMEM; - - list_add_tail(&obj->node, &patch->obj_list); } klp_for_each_func(old_obj, old_func) { @@ -510,8 +510,6 @@ static int klp_add_object_nops(struct klp_patch *patch, func = klp_alloc_func_nop(old_func, obj); if (!func) return -ENOMEM; - - list_add_tail(&func->node, &obj->func_list); } return 0; @@ -802,6 +800,21 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) return ret; } +static void klp_init_func_early(struct klp_object *obj, + struct klp_func *func) +{ + kobject_init(&func->kobj, &klp_ktype_func); + list_add_tail(&func->node, &obj->func_list); +} + +static void klp_init_object_early(struct klp_patch *patch, + struct klp_object *obj) +{ + INIT_LIST_HEAD(&obj->func_list); + kobject_init(&obj->kobj, &klp_ktype_object); + list_add_tail(&obj->node, &patch->obj_list); +} + static int klp_init_patch_early(struct klp_patch *patch) { struct klp_object *obj; @@ -822,13 +835,10 @@ static int klp_init_patch_early(struct klp_patch *patch) if (!obj->funcs) return -EINVAL; - INIT_LIST_HEAD(&obj->func_list); - kobject_init(&obj->kobj, &klp_ktype_object); - list_add_tail(&obj->node, &patch->obj_list); + klp_init_object_early(patch, obj); klp_for_each_func_static(obj, func) { - kobject_init(&func->kobj, &klp_ktype_func); - list_add_tail(&func->node, &obj->func_list); + klp_init_func_early(obj, func); } } -- cgit From 13bf5ced93775ffccb53527a9d862e023a9daa03 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 25 Mar 2019 15:44:06 +0100 Subject: dma-mapping: add a Kconfig symbol to indicate arch_dma_prep_coherent presence Add a Kconfig symbol that indicates an architecture provides a arch_dma_prep_coherent implementation, and provide a stub otherwise. This will allow the generic dma-iommu code to use it while still allowing to be built for cache coherent architectures. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- arch/arm64/Kconfig | 1 + arch/csky/Kconfig | 1 + include/linux/dma-noncoherent.h | 6 ++++++ kernel/dma/Kconfig | 3 +++ 4 files changed, 11 insertions(+) (limited to 'kernel') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7e34b9eba5de..adda078d6df7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,6 +13,7 @@ config ARM64 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_COHERENT_TO_PFN select ARCH_HAS_DMA_MMAP_PGPROT + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 725a115759c9..2c3178848b7d 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -1,6 +1,7 @@ config CSKY def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_USE_BUILTIN_BSWAP diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 69b36ed31a99..9741767e400f 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -72,6 +72,12 @@ static inline void arch_sync_dma_for_cpu_all(struct device *dev) } #endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ +#ifdef CONFIG_ARCH_HAS_DMA_PREP_COHERENT void arch_dma_prep_coherent(struct page *page, size_t size); +#else +static inline void arch_dma_prep_coherent(struct page *page, size_t size) +{ +} +#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ #endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 52b704e2b97a..83d711f8d665 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -38,6 +38,9 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL bool +config ARCH_HAS_DMA_PREP_COHERENT + bool + config ARCH_HAS_DMA_COHERENT_TO_PFN bool -- cgit From 533307dc20a9e84a0687d4ca24aeb669516c0243 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 30 Apr 2019 17:57:29 +0800 Subject: cgroup: Remove unused cgrp variable The 'cgrp' is set but not used in commit <76f969e8948d8> ("cgroup: cgroup v2 freezer"). Remove it to avoid [-Wunused-but-set-variable] warning. Cc: Tejun Heo Signed-off-by: Shaokun Zhang Acked-by: Roman Gushchin Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 57edcf398d71..4fe9f7f1a3fa 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5864,11 +5864,8 @@ void cgroup_post_fork(struct task_struct *child) * the task into the frozen state. */ if (unlikely(cgroup_task_freeze(child))) { - struct cgroup *cgrp; - spin_lock(&child->sighand->siglock); WARN_ON_ONCE(child->frozen); - cgrp = cset->dfl_cgrp; child->jobctl |= JOBCTL_TRAP_FREEZE; spin_unlock(&child->sighand->siglock); -- cgit From cb2c4cd87874a7975b7b8615866b3a87bae10aab Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 26 Apr 2019 10:59:44 -0700 Subject: cgroup: prevent spurious transition into non-frozen state If freezing of a cgroup races with waking of a task from the frozen state (like waiting in vfork() or in do_signal_stop()), a spurious transition of the cgroup state can happen. The task enters cgroup_leave_frozen(true), the cgroup->nr_frozen_tasks counter decrements, and the cgroup is switched to the unfrozen state. To prevent it, let's reserve cgroup_leave_frozen(true) for terminating processes and use cgroup_leave_frozen(false) otherwise. To avoid busy-looping in the signal handling loop waiting for JOBCTL_TRAP_FREEZE set from the cgroup freezing path, let's do it explicitly in cgroup_leave_frozen(), if the task is going to stay frozen. Suggested-by: Oleg Nesterov Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo --- kernel/cgroup/freezer.c | 16 +++++----------- kernel/signal.c | 2 +- 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index 3bfbb3c8baf3..c321e768f8d3 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c @@ -139,19 +139,13 @@ void cgroup_leave_frozen(bool always_leave) cgroup_update_frozen(cgrp); WARN_ON_ONCE(!current->frozen); current->frozen = false; + } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { + spin_lock(¤t->sighand->siglock); + current->jobctl |= JOBCTL_TRAP_FREEZE; + set_thread_flag(TIF_SIGPENDING); + spin_unlock(¤t->sighand->siglock); } spin_unlock_irq(&css_set_lock); - - if (unlikely(current->frozen)) { - /* - * If the task remained in the frozen state, - * make sure it won't reach userspace without - * entering the signal handling loop. - */ - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } } /* diff --git a/kernel/signal.c b/kernel/signal.c index 095e0fc57b25..16b72f4f14df 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2514,7 +2514,7 @@ relock: */ if (unlikely(cgroup_task_frozen(current))) { spin_unlock_irq(&sighand->siglock); - cgroup_leave_frozen(true); + cgroup_leave_frozen(false); goto relock; } -- cgit From 96b9c592def5d7203bdad1337d9c92a2183de5cb Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 26 Apr 2019 10:59:45 -0700 Subject: cgroup: get rid of cgroup_freezer_frozen_exit() A task should never enter the exit path with the task->frozen bit set. Any frozen task must enter the signal handling loop and the only way to escape is through cgroup_leave_frozen(true), which unconditionally drops the task->frozen bit. So it means that cgroyp_freezer_frozen_exit() has zero chances to be called and has to be removed. Let's put a WARN_ON_ONCE() instead of the cgroup_freezer_frozen_exit() call to catch any potential leak of the task's frozen bit. Suggested-by: Oleg Nesterov Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 2 +- kernel/cgroup/cgroup.c | 5 ++--- kernel/cgroup/freezer.c | 10 ---------- 3 files changed, 3 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3e2efd412dfa..c0077adeea83 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -889,7 +889,7 @@ void cgroup_update_frozen(struct cgroup *cgrp); void cgroup_freeze(struct cgroup *cgrp, bool freeze); void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, struct cgroup *dst); -void cgroup_freezer_frozen_exit(struct task_struct *task); + static inline bool cgroup_task_freeze(struct task_struct *task) { bool ret; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4fe9f7f1a3fa..327f37c9fdfa 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5926,9 +5926,8 @@ void cgroup_exit(struct task_struct *tsk) css_set_move_task(tsk, cset, NULL, false); cset->nr_tasks--; - if (unlikely(cgroup_task_frozen(tsk))) - cgroup_freezer_frozen_exit(tsk); - else if (unlikely(cgroup_task_freeze(tsk))) + WARN_ON_ONCE(cgroup_task_frozen(tsk)); + if (unlikely(cgroup_task_freeze(tsk))) cgroup_update_frozen(task_dfl_cgroup(tsk)); spin_unlock_irq(&css_set_lock); diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index c321e768f8d3..8cf010680678 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c @@ -248,16 +248,6 @@ void cgroup_freezer_migrate_task(struct task_struct *task, cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); } -void cgroup_freezer_frozen_exit(struct task_struct *task) -{ - struct cgroup *cgrp = task_dfl_cgroup(task); - - lockdep_assert_held(&css_set_lock); - - cgroup_dec_frozen_cnt(cgrp); - cgroup_update_frozen(cgrp); -} - void cgroup_freeze(struct cgroup *cgrp, bool freeze) { struct cgroup_subsys_state *css; -- cgit From 1900da520c9fdc3a7cf00d21638f7c8721d5ac7f Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Sun, 21 Apr 2019 19:47:27 +0800 Subject: kernel: cgroup: fix misuse of %x Pointers should be printed with %p or %px rather than cast to unsigned long type and printed with %lx. Change %lx to %p to print the pointers. Signed-off-by: Fuqian Huang Signed-off-by: Tejun Heo --- kernel/cgroup/debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index 5f1b87330bee..80aa3f027ac3 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c @@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v) css = cset->subsys[ss->id]; if (!css) continue; - seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name, - (unsigned long)css, css->id); + seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name, + css, css->id); } rcu_read_unlock(); spin_unlock_irq(&css_set_lock); @@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v) if (css->parent) snprintf(pbuf, sizeof(pbuf) - 1, " P=%d", css->parent->id); - seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name, - (unsigned long)css, css->id, + seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name, + css, css->id, atomic_read(&css->online_cnt), pbuf); } -- cgit From a9e9bcb45b1525ba7aea26ed9441e8632aeeda58 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 28 Apr 2019 17:25:38 -0400 Subject: locking/rwsem: Prevent decrement of reader count before increment During my rwsem testing, it was found that after a down_read(), the reader count may occasionally become 0 or even negative. Consequently, a writer may steal the lock at that time and execute with the reader in parallel thus breaking the mutual exclusion guarantee of the write lock. In other words, both readers and writer can become rwsem owners simultaneously. The current reader wakeup code does it in one pass to clear waiter->task and put them into wake_q before fully incrementing the reader count. Once waiter->task is cleared, the corresponding reader may see it, finish the critical section and do unlock to decrement the count before the count is incremented. This is not a problem if there is only one reader to wake up as the count has been pre-incremented by 1. It is a problem if there are more than one readers to be woken up and writer can steal the lock. The wakeup was actually done in 2 passes before the following v4.9 commit: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") To fix this problem, the wakeup is now done in two passes again. In the first pass, we collect the readers and count them. The reader count is then fully incremented. In the second pass, the waiter->task is then cleared and they are put into wake_q to be woken up later. Signed-off-by: Waiman Long Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: huang ying Fixes: 70800c3c0cc5 ("locking/rwsem: Scan the wait_list for readers only once") Link: http://lkml.kernel.org/r/20190428212557.13482-2-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 46 ++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 6b3ee9948bf1..0b1f77957240 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -130,6 +130,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, { struct rwsem_waiter *waiter, *tmp; long oldcount, woken = 0, adjustment = 0; + struct list_head wlist; /* * Take a peek at the queue head waiter such that we can determine @@ -188,18 +189,43 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * of the queue. We know that woken will be at least 1 as we accounted * for above. Note we increment the 'active part' of the count by the * number of readers before waking any processes up. + * + * We have to do wakeup in 2 passes to prevent the possibility that + * the reader count may be decremented before it is incremented. It + * is because the to-be-woken waiter may not have slept yet. So it + * may see waiter->task got cleared, finish its critical section and + * do an unlock before the reader count increment. + * + * 1) Collect the read-waiters in a separate list, count them and + * fully increment the reader count in rwsem. + * 2) For each waiters in the new list, clear waiter->task and + * put them into wake_q to be woken up later. */ - list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { - struct task_struct *tsk; - + list_for_each_entry(waiter, &sem->wait_list, list) { if (waiter->type == RWSEM_WAITING_FOR_WRITE) break; woken++; - tsk = waiter->task; + } + list_cut_before(&wlist, &sem->wait_list, &waiter->list); + + adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + lockevent_cond_inc(rwsem_wake_reader, woken); + if (list_empty(&sem->wait_list)) { + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; + } + + if (adjustment) + atomic_long_add(adjustment, &sem->count); + + /* 2nd pass */ + list_for_each_entry_safe(waiter, tmp, &wlist, list) { + struct task_struct *tsk; + tsk = waiter->task; get_task_struct(tsk); - list_del(&waiter->list); + /* * Ensure calling get_task_struct() before setting the reader * waiter to nil such that rwsem_down_read_failed() cannot @@ -213,16 +239,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, */ wake_q_add_safe(wake_q, tsk); } - - adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; - lockevent_cond_inc(rwsem_wake_reader, woken); - if (list_empty(&sem->wait_list)) { - /* hit end of list above */ - adjustment -= RWSEM_WAITING_BIAS; - } - - if (adjustment) - atomic_long_add(adjustment, &sem->count); } /* -- cgit From b3e5838252665ee4cfa76b82bdf1198dca81e5be Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 27 Mar 2019 13:04:15 +0100 Subject: clone: add CLONE_PIDFD This patchset makes it possible to retrieve pid file descriptors at process creation time by introducing the new flag CLONE_PIDFD to the clone() system call. Linus originally suggested to implement this as a new flag to clone() instead of making it a separate system call. As spotted by Linus, there is exactly one bit for clone() left. CLONE_PIDFD creates file descriptors based on the anonymous inode implementation in the kernel that will also be used to implement the new mount api. They serve as a simple opaque handle on pids. Logically, this makes it possible to interpret a pidfd differently, narrowing or widening the scope of various operations (e.g. signal sending). Thus, a pidfd cannot just refer to a tgid, but also a tid, or in theory - given appropriate flag arguments in relevant syscalls - a process group or session. A pidfd does not represent a privilege. This does not imply it cannot ever be that way but for now this is not the case. A pidfd comes with additional information in fdinfo if the kernel supports procfs. The fdinfo file contains the pid of the process in the callers pid namespace in the same format as the procfs status file, i.e. "Pid:\t%d". As suggested by Oleg, with CLONE_PIDFD the pidfd is returned in the parent_tidptr argument of clone. This has the advantage that we can give back the associated pid and the pidfd at the same time. To remove worries about missing metadata access this patchset comes with a sample program that illustrates how a combination of CLONE_PIDFD, and pidfd_send_signal() can be used to gain race-free access to process metadata through /proc/. The sample program can easily be translated into a helper that would be suitable for inclusion in libc so that users don't have to worry about writing it themselves. Suggested-by: Linus Torvalds Signed-off-by: Christian Brauner Co-developed-by: Jann Horn Signed-off-by: Jann Horn Reviewed-by: Oleg Nesterov Cc: Arnd Bergmann Cc: "Eric W. Biederman" Cc: Kees Cook Cc: Thomas Gleixner Cc: David Howells Cc: "Michael Kerrisk (man-pages)" Cc: Andy Lutomirsky Cc: Andrew Morton Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro --- include/linux/pid.h | 2 + include/uapi/linux/sched.h | 1 + kernel/fork.c | 107 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 106 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/pid.h b/include/linux/pid.h index b6f4ba16065a..3c8ef5a199ca 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -66,6 +66,8 @@ struct pid extern struct pid init_struct_pid; +extern const struct file_operations pidfd_fops; + static inline struct pid *get_pid(struct pid *pid) { if (pid) diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 22627f80063e..ed4ee170bee2 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -10,6 +10,7 @@ #define CLONE_FS 0x00000200 /* set if fs info shared between processes */ #define CLONE_FILES 0x00000400 /* set if open files shared between processes */ #define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */ #define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ diff --git a/kernel/fork.c b/kernel/fork.c index 9dcd18aa210b..e45f0acaf451 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -11,6 +11,7 @@ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' */ +#include #include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1662,6 +1664,58 @@ static inline void rcu_copy_process(struct task_struct *p) #endif /* #ifdef CONFIG_TASKS_RCU */ } +static int pidfd_release(struct inode *inode, struct file *file) +{ + struct pid *pid = file->private_data; + + file->private_data = NULL; + put_pid(pid); + return 0; +} + +#ifdef CONFIG_PROC_FS +static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); + struct pid *pid = f->private_data; + + seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns)); + seq_putc(m, '\n'); +} +#endif + +const struct file_operations pidfd_fops = { + .release = pidfd_release, +#ifdef CONFIG_PROC_FS + .show_fdinfo = pidfd_show_fdinfo, +#endif +}; + +/** + * pidfd_create() - Create a new pid file descriptor. + * + * @pid: struct pid that the pidfd will reference + * + * This creates a new pid file descriptor with the O_CLOEXEC flag set. + * + * Note, that this function can only be called after the fd table has + * been unshared to avoid leaking the pidfd to the new process. + * + * Return: On success, a cloexec pidfd is returned. + * On error, a negative errno number will be returned. + */ +static int pidfd_create(struct pid *pid) +{ + int fd; + + fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), + O_RDWR | O_CLOEXEC); + if (fd < 0) + put_pid(pid); + + return fd; +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -1674,13 +1728,14 @@ static __latent_entropy struct task_struct *copy_process( unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, + int __user *parent_tidptr, int __user *child_tidptr, struct pid *pid, int trace, unsigned long tls, int node) { - int retval; + int pidfd = -1, retval; struct task_struct *p; struct multiprocess_signals delayed; @@ -1730,6 +1785,31 @@ static __latent_entropy struct task_struct *copy_process( return ERR_PTR(-EINVAL); } + if (clone_flags & CLONE_PIDFD) { + int reserved; + + /* + * - CLONE_PARENT_SETTID is useless for pidfds and also + * parent_tidptr is used to return pidfds. + * - CLONE_DETACHED is blocked so that we can potentially + * reuse it later for CLONE_PIDFD. + * - CLONE_THREAD is blocked until someone really needs it. + */ + if (clone_flags & + (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) + return ERR_PTR(-EINVAL); + + /* + * Verify that parent_tidptr is sane so we can potentially + * reuse it later. + */ + if (get_user(reserved, parent_tidptr)) + return ERR_PTR(-EFAULT); + + if (reserved != 0) + return ERR_PTR(-EINVAL); + } + /* * Force any signals received before this point to be delivered * before the fork happens. Collect up signals sent to multiple @@ -1936,6 +2016,22 @@ static __latent_entropy struct task_struct *copy_process( } } + /* + * This has to happen after we've potentially unshared the file + * descriptor table (so that the pidfd doesn't leak into the child + * if the fd table isn't shared). + */ + if (clone_flags & CLONE_PIDFD) { + retval = pidfd_create(pid); + if (retval < 0) + goto bad_fork_free_pid; + + pidfd = retval; + retval = put_user(pidfd, parent_tidptr); + if (retval) + goto bad_fork_put_pidfd; + } + #ifdef CONFIG_BLOCK p->plug = NULL; #endif @@ -1996,7 +2092,7 @@ static __latent_entropy struct task_struct *copy_process( */ retval = cgroup_can_fork(p); if (retval) - goto bad_fork_free_pid; + goto bad_fork_put_pidfd; /* * From this point on we must avoid any synchronous user-space @@ -2111,6 +2207,9 @@ bad_fork_cancel_cgroup: spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p); +bad_fork_put_pidfd: + if (clone_flags & CLONE_PIDFD) + ksys_close(pidfd); bad_fork_free_pid: cgroup_threadgroup_change_end(current); if (pid != &init_struct_pid) @@ -2176,7 +2275,7 @@ static inline void init_idle_pids(struct task_struct *idle) struct task_struct *fork_idle(int cpu) { struct task_struct *task; - task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, + task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0, cpu_to_node(cpu)); if (!IS_ERR(task)) { init_idle_pids(task); @@ -2223,7 +2322,7 @@ long _do_fork(unsigned long clone_flags, trace = 0; } - p = copy_process(clone_flags, stack_start, stack_size, + p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr, child_tidptr, NULL, trace, tls, NUMA_NO_NODE); add_latent_entropy(); -- cgit From 2151ad1b067275730de1b38c7257478cae47d29e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 17 Apr 2019 22:50:25 +0200 Subject: signal: support CLONE_PIDFD with pidfd_send_signal Let pidfd_send_signal() use pidfds retrieved via CLONE_PIDFD. With this patch pidfd_send_signal() becomes independent of procfs. This fullfils the request made when we merged the pidfd_send_signal() patchset. The pidfd_send_signal() syscall is now always available allowing for it to be used by users without procfs mounted or even users without procfs support compiled into the kernel. Signed-off-by: Christian Brauner Co-developed-by: Jann Horn Signed-off-by: Jann Horn Acked-by: Oleg Nesterov Cc: Arnd Bergmann Cc: "Eric W. Biederman" Cc: Kees Cook Cc: Thomas Gleixner Cc: David Howells Cc: "Michael Kerrisk (man-pages)" Cc: Andy Lutomirsky Cc: Andrew Morton Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro --- kernel/signal.c | 12 +++++++++--- kernel/sys_ni.c | 3 --- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index f98448cf2def..1581140f2d99 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3513,7 +3513,6 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) return kill_something_info(sig, &info, pid); } -#ifdef CONFIG_PROC_FS /* * Verify that the signaler and signalee either are in the same pid namespace * or that the signaler's pid namespace is an ancestor of the signalee's pid @@ -3550,6 +3549,14 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info) return copy_siginfo_from_user(kinfo, info); } +static struct pid *pidfd_to_pid(const struct file *file) +{ + if (file->f_op == &pidfd_fops) + return file->private_data; + + return tgid_pidfd_to_pid(file); +} + /** * sys_pidfd_send_signal - send a signal to a process through a task file * descriptor @@ -3586,7 +3593,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, return -EBADF; /* Is this a pidfd? */ - pid = tgid_pidfd_to_pid(f.file); + pid = pidfd_to_pid(f.file); if (IS_ERR(pid)) { ret = PTR_ERR(pid); goto err; @@ -3620,7 +3627,6 @@ err: fdput(f); return ret; } -#endif /* CONFIG_PROC_FS */ static int do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info) diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index d21f4befaea4..4d9ae5ea6caf 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -167,9 +167,6 @@ COND_SYSCALL(syslog); /* kernel/sched/core.c */ -/* kernel/signal.c */ -COND_SYSCALL(pidfd_send_signal); - /* kernel/sys.c */ COND_SYSCALL(setregid); COND_SYSCALL(setgid); -- cgit From 4dd537aca25dd2e0e8aca8b8923930cbe6240003 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 May 2019 22:55:41 +0900 Subject: tracing: uprobes: Re-enable $comm support for uprobe events Since commit 533059281ee5 ("tracing: probeevent: Introduce new argument fetching code") dropped the $comm support from uprobe events, this re-enables it. For $comm support, uses strlcpy() instead of strncpy_from_user() to copy current task's comm. Because it is in the kernel space, strncpy_from_user() always fails to copy the comm. This also uses strlen() instead of strnlen_user() to measure the length of the comm. Note that this uses -ECOMM as a token value to fetch the comm string. If the user-space pointer points -ECOMM, it will be translated to task->comm. Link: http://lkml.kernel.org/r/155723734162.9149.4042756162201097965.stgit@devnote2 Fixes: 533059281ee5 ("tracing: probeevent: Introduce new argument fetching code") Reported-by: Andreas Ziegler Acked-by: Andreas Ziegler Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.h | 1 + kernel/trace/trace_uprobe.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index b7737666c1a8..f9a8c632188b 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -124,6 +124,7 @@ struct fetch_insn { /* fetch + deref*N + store + mod + end <= 16, this allows N=12, enough */ #define FETCH_INSN_MAX 16 +#define FETCH_TOKEN_COMM (-ECOMM) /* Fetch type information table */ struct fetch_type { diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index cd8750a72768..eb7e06b54741 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -156,7 +156,10 @@ fetch_store_string(unsigned long addr, void *dest, void *base) if (unlikely(!maxlen)) return -ENOMEM; - ret = strncpy_from_user(dst, src, maxlen); + if (addr == FETCH_TOKEN_COMM) + ret = strlcpy(dst, current->comm, maxlen); + else + ret = strncpy_from_user(dst, src, maxlen); if (ret >= 0) { if (ret == maxlen) dst[ret - 1] = '\0'; @@ -180,7 +183,10 @@ fetch_store_strlen(unsigned long addr) int len; void __user *vaddr = (void __force __user *) addr; - len = strnlen_user(vaddr, MAX_STRING_SIZE); + if (addr == FETCH_TOKEN_COMM) + len = strlen(current->comm) + 1; + else + len = strnlen_user(vaddr, MAX_STRING_SIZE); return (len > MAX_STRING_SIZE) ? 0 : len; } @@ -220,6 +226,9 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest, case FETCH_OP_IMM: val = code->immediate; break; + case FETCH_OP_COMM: + val = FETCH_TOKEN_COMM; + break; case FETCH_OP_FOFFS: val = translate_user_vaddr(code->immediate); break; -- cgit From 489fe0096b19b664b8f3bed0fd604d617a229b5a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 May 2019 22:55:52 +0900 Subject: tracing: probeevent: Do not accumulate on ret variable Do not accumulate strlen result on "ret" local variable, because it is accumulated on "total" local variable for array case. Link: http://lkml.kernel.org/r/155723735237.9149.3192150444705457531.stgit@devnote2 Fixes: 40b53b771806 ("tracing: probeevent: Add array type support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 4737bb8c07a3..c30c61f12ddd 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -88,7 +88,7 @@ stage3: /* 3rd stage: store value to buffer */ if (unlikely(!dest)) { if (code->op == FETCH_OP_ST_STRING) { - ret += fetch_store_strlen(val + code->offset); + ret = fetch_store_strlen(val + code->offset); code++; goto array; } else -- cgit From 3dd1f7f24f8ceec00bbbc364c2ac3c893f0fdc4c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 7 May 2019 22:56:02 +0900 Subject: tracing: probeevent: Fix to make the type of $comm string Fix to make the type of $comm "string". If we set the other type to $comm argument, it shows meaningless value or wrong data. Currently probe events allow us to set string array type (e.g. ":string[2]"), or other digit types like x8 on $comm. But since clearly $comm is just a string data, it should not be fetched by other types including array. Link: http://lkml.kernel.org/r/155723736241.9149.14582064184468574539.stgit@devnote2 Cc: Andreas Ziegler Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 533059281ee5 ("tracing: probeevent: Introduce new argument fetching code") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 4cc2d467d34c..e0d1d5353464 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -533,13 +533,14 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, } } } - /* - * The default type of $comm should be "string", and it can't be - * dereferenced. - */ - if (!t && strcmp(arg, "$comm") == 0) + + /* Since $comm can not be dereferred, we can find $comm by strcmp */ + if (strcmp(arg, "$comm") == 0) { + /* The type of $comm must be "string", and not an array. */ + if (parg->count || (t && strcmp(t, "string"))) + return -EINVAL; parg->type = find_fetch_type("string"); - else + } else parg->type = find_fetch_type(t); if (!parg->type) { trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE); -- cgit From 5c173bedb24dfcc8e412d3c3f111c504e4408dd5 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 1 Nov 2018 11:46:40 -0400 Subject: ring-buffer: Fix mispelling of Calculate It's not "Caculate". Link: http://lkml.kernel.org/r/20181101154640.23162-1-tiny.windzz@gmail.com Signed-off-by: Yangtao Li Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index ffba6789c0e2..0564f6db0561 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -362,7 +362,7 @@ static void ring_buffer_producer(void) hit--; /* make it non zero */ } - /* Caculate the average time in nanosecs */ + /* Calculate the average time in nanosecs */ avg = NSEC_PER_MSEC / (hit + missed); trace_printk("%ld ns per entry\n", avg); } -- cgit From 0f5e5a3ab7fa1c09370a4d709ad6157457d5b8b6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Mar 2019 09:17:57 +0100 Subject: tracing: Eliminate const char[] auto variables Automatic const char[] variables cause unnecessary code generation. For example, the this_mod variable leads to 3f04: 48 b8 5f 5f 74 68 69 73 5f 6d movabs $0x6d5f736968745f5f,%rax # __this_m 3f0e: 4c 8d 44 24 02 lea 0x2(%rsp),%r8 3f13: 48 8d 7c 24 10 lea 0x10(%rsp),%rdi 3f18: 48 89 44 24 02 mov %rax,0x2(%rsp) 3f1d: 4c 89 e9 mov %r13,%rcx 3f20: b8 65 00 00 00 mov $0x65,%eax # e 3f25: 48 c7 c2 00 00 00 00 mov $0x0,%rdx 3f28: R_X86_64_32S .rodata.str1.1+0x18d 3f2c: be 48 00 00 00 mov $0x48,%esi 3f31: c7 44 24 0a 6f 64 75 6c movl $0x6c75646f,0xa(%rsp) # odul 3f39: 66 89 44 24 0e mov %ax,0xe(%rsp) i.e., the string gets built on the stack at runtime. Similar code can be found for the other instances I'm replacing here. Putting the string in .rodata reduces the combined .text+.rodata size and saves time and stack space at runtime. The simplest fix, and what I've done for the this_mod case, is to just make the variable static. However, for the "" case where the same string is used twice, that prevents the linker from merging those two literals, so instead use a macro - that also keeps the two instances automatically in sync (instead of only the compile-time strlen expression). Finally, for the two runs of spaces, it turns out that the "build these strings on the stack" is not the worst part of what gcc does - it turns print_func_help_header_irq() into "if (tgid) { /* print_event_info + five seq_printf calls */ } else { /* print event_info + another five seq_printf */}". Taking inspiration from a suggestion from Al Viro, use %.*s to make snprintf either stop after the first two spaces or print the whole string. As a bonus, the seq_printfs now fit on single lines (at least, they are not longer than the existing ones in the function just above), making it easier to see that the ascii art lines up. x86-64 defconfig + CONFIG_FUNCTION_TRACER: $ scripts/stackdelta /tmp/stackusage.{0,1} ./kernel/trace/ftrace.c ftrace_mod_callback 152 136 -16 ./kernel/trace/trace.c trace_default_header 56 32 -24 ./kernel/trace/trace.c tracing_mark_raw_write 96 72 -24 ./kernel/trace/trace.c tracing_mark_write 104 80 -24 bloat-o-meter add/remove: 1/0 grow/shrink: 0/4 up/down: 14/-375 (-361) Function old new delta this_mod - 14 +14 ftrace_mod_callback 577 542 -35 tracing_mark_raw_write 444 374 -70 tracing_mark_write 616 540 -76 trace_default_header 600 406 -194 Link: http://lkml.kernel.org/r/20190320081757.6037-1-linux@rasmusvillemoes.dk Signed-off-by: Rasmus Villemoes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.c | 34 +++++++++++++--------------------- 2 files changed, 14 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 433a64f49532..7765a53f1006 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3875,7 +3875,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, static bool module_exists(const char *module) { /* All modules have the symbol __this_module */ - const char this_mod[] = "__this_module"; + static const char this_mod[] = "__this_module"; char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; unsigned long val; int n; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dcb9adb44be9..3259019cc66d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3593,25 +3593,18 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char tgid_space[] = " "; - const char space[] = " "; + const char *space = " "; + int prec = tgid ? 10 : 2; print_event_info(buf, m); - seq_printf(m, "# %s _-----=> irqs-off\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s / _----=> need-resched\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s| / _---=> hardirq/softirq\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s|| / _--=> preempt-depth\n", - tgid ? tgid_space : space); - seq_printf(m, "# %s||| / delay\n", - tgid ? tgid_space : space); - seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n", - tgid ? " TGID " : space); - seq_printf(m, "# | | %s | |||| | |\n", - tgid ? " | " : space); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); } void @@ -6342,13 +6335,13 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, struct ring_buffer *buffer; struct print_entry *entry; unsigned long irq_flags; - const char faulted[] = ""; ssize_t written; int size; int len; /* Used in tracing_mark_raw_write() as well */ -#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */ +#define FAULTED_STR "" +#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */ if (tracing_disabled) return -EINVAL; @@ -6380,7 +6373,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); if (len) { - memcpy(&entry->buf, faulted, FAULTED_SIZE); + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); cnt = FAULTED_SIZE; written = -EFAULT; } else @@ -6421,7 +6414,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, struct ring_buffer_event *event; struct ring_buffer *buffer; struct raw_data_entry *entry; - const char faulted[] = ""; unsigned long irq_flags; ssize_t written; int size; @@ -6461,7 +6453,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); if (len) { entry->id = -1; - memcpy(&entry->buf, faulted, FAULTED_SIZE); + memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); written = -EFAULT; } else written = cnt; -- cgit From bfcd631eb6de474d8e097fd0f9f840fdf7272a1d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Nov 2018 13:23:12 +0000 Subject: tracing: Fix white space issues in parse_pred() function Trivial fix to clean up an indentation issue, a whole chunk of code has an extra space in the indentation. Link: http://lkml.kernel.org/r/20181109132312.20994-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 48 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 180ecb390baa..d3e59312ef40 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1222,30 +1222,30 @@ static int parse_pred(const char *str, void *data, * (perf doesn't use it) and grab everything. */ if (strcmp(field->name, "ip") != 0) { - parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); - goto err_free; - } - pred->fn = filter_pred_none; - - /* - * Quotes are not required, but if they exist then we need - * to read them till we hit a matching one. - */ - if (str[i] == '\'' || str[i] == '"') - q = str[i]; - else - q = 0; - - for (i++; str[i]; i++) { - if (q && str[i] == q) - break; - if (!q && (str[i] == ')' || str[i] == '&' || - str[i] == '|')) - break; - } - /* Skip quotes */ - if (q) - s++; + parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); + goto err_free; + } + pred->fn = filter_pred_none; + + /* + * Quotes are not required, but if they exist then we need + * to read them till we hit a matching one. + */ + if (str[i] == '\'' || str[i] == '"') + q = str[i]; + else + q = 0; + + for (i++; str[i]; i++) { + if (q && str[i] == q) + break; + if (!q && (str[i] == ')' || str[i] == '&' || + str[i] == '|')) + break; + } + /* Skip quotes */ + if (q) + s++; len = i - s; if (len >= MAX_FILTER_STR_VAL) { parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); -- cgit From 6fc2171c5c03672bae71d04a0f5fa88cc9c3b4e2 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 30 Nov 2018 15:56:22 +0100 Subject: tracing: Allow RCU to run between postponed startup tests When building a allmodconfig kernel for arm64 and boot that in qemu, CONFIG_FTRACE_STARTUP_TEST gets enabled and that takes time so the watchdog expires and prints out a message like this: 'watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:1]' Depending on what the what test gets called from init_trace_selftests() it stays minutes in the loop. Rework so that function cond_resched() gets called in the init_trace_selftests loop. Link: http://lkml.kernel.org/r/20181130145622.26334-1-anders.roxell@linaro.org Co-developed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Signed-off-by: Anders Roxell Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3259019cc66d..4269af5905e4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1722,6 +1722,10 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); list_for_each_entry_safe(p, n, &postponed_selftests, list) { + /* This loop can take minutes when sanitizers are enabled, so + * lets make sure we allow RCU processing. + */ + cond_resched(); ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ if (ret < 0) { -- cgit From cbe08bcbbe787315c425dde284dcb715cfbf3f39 Mon Sep 17 00:00:00 2001 From: Elazar Leibovich Date: Mon, 31 Dec 2018 13:58:37 +0200 Subject: tracing: Fix partial reading of trace event's id file When reading only part of the id file, the ppos isn't tracked correctly. This is taken care by simple_read_from_buffer. Reading a single byte, and then the next byte would result EOF. While this seems like not a big deal, this breaks abstractions that reads information from files unbuffered. See for example https://github.com/golang/go/issues/29399 This code was mentioned as problematic in commit cd458ba9d5a5 ("tracing: Do not (ab)use trace_seq in event_id_read()") An example C code that show this bug is: #include #include #include #include #include #include int main(int argc, char **argv) { if (argc < 2) return 1; int fd = open(argv[1], O_RDONLY); char c; read(fd, &c, 1); printf("First %c\n", c); read(fd, &c, 1); printf("Second %c\n", c); } Then run with, e.g. sudo ./a.out /sys/kernel/debug/tracing/events/tcp/tcp_set_state/id You'll notice you're getting the first character twice, instead of the first two characters in the id file. Link: http://lkml.kernel.org/r/20181231115837.4932-1-elazar@lightbitslabs.com Cc: Orit Wasserman Cc: Oleg Nesterov Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 23725aeeab10b ("ftrace: provide an id file for each event") Signed-off-by: Elazar Leibovich Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 81c038ed6cee..0ce3db67f556 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1319,9 +1319,6 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) char buf[32]; int len; - if (*ppos) - return 0; - if (unlikely(!id)) return -ENODEV; -- cgit From 8623b00676f16ed8972008095deca2c8e2b97a37 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 14 Jan 2019 22:34:08 -0600 Subject: tracing: Replace kzalloc with kcalloc Replace kzalloc() function with its 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a, b, gfp) This code was detected with the help of Coccinelle. Link: http://lkml.kernel.org/r/20190115043408.GA23456@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index e0d1d5353464..a347faced959 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -558,7 +558,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, parg->count); } - code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL); + code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL); if (!code) return -ENOMEM; code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; @@ -637,7 +637,7 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size, code->op = FETCH_OP_END; /* Shrink down the code buffer */ - parg->code = kzalloc(sizeof(*code) * (code - tmp + 1), GFP_KERNEL); + parg->code = kcalloc(code - tmp + 1, sizeof(*code), GFP_KERNEL); if (!parg->code) ret = -ENOMEM; else -- cgit From b9416997603ef7e17d4de10b6408f19da2feb72c Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat (VMware)" Date: Mon, 28 Jan 2019 17:55:53 -0800 Subject: tracing: Fix documentation about disabling options using trace_options To disable a tracing option using the trace_options file, the option name needs to be prefixed with 'no', and not suffixed, as the README states. Fix it. Link: http://lkml.kernel.org/r/154872690031.47356.5739053380942044586.stgit@srivatsa-ubuntu Signed-off-by: Srivatsa S. Bhat (VMware) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4269af5905e4..a3a6945a7732 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4755,7 +4755,7 @@ static const char readme_msg[] = " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" "\t\t\t Remove sub-buffer with rmdir\n" " trace_options\t\t- Set format or modify how tracing happens\n" - "\t\t\t Disable an option by adding a suffix 'no' to the\n" + "\t\t\t Disable an option by prefixing 'no' to the\n" "\t\t\t option name\n" " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" #ifdef CONFIG_DYNAMIC_FTRACE -- cgit From fdc6bae940ee9eb869e493990540098b8c0fd6ab Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Wed, 17 Apr 2019 10:48:33 +0200 Subject: ntp: Allow TAI-UTC offset to be set to zero The ADJ_TAI adjtimex mode sets the TAI-UTC offset of the system clock. It is typically set by NTP/PTP implementations and it is automatically updated by the kernel on leap seconds. The initial value is zero (which applications may interpret as unknown), but this value cannot be set by adjtimex. This limitation seems to go back to the original "nanokernel" implementation by David Mills. Change the ADJ_TAI check to accept zero as a valid TAI-UTC offset in order to allow setting it back to the initial value. Fixes: 153b5d054ac2 ("ntp: support for TAI") Suggested-by: Ondrej Mosnacek Signed-off-by: Miroslav Lichvar Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Richard Cochran Cc: Prarit Bhargava Link: https://lkml.kernel.org/r/20190417084833.7401-1-mlichvar@redhat.com --- kernel/time/ntp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 92a90014a925..f43d47c8c3b6 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -690,7 +690,7 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, time_constant = max(time_constant, 0l); } - if (txc->modes & ADJ_TAI && txc->constant > 0) + if (txc->modes & ADJ_TAI && txc->constant >= 0) *time_tai = txc->constant; if (txc->modes & ADJ_OFFSET) -- cgit From f2b31bb598248c04721cb8485e6091a9feb045ac Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 8 May 2019 13:34:20 -0700 Subject: cgroup: never call do_group_exit() with task->frozen bit set I've got two independent reports that cgroup_task_frozen() check in cgroup_exit() has been triggered by lkp libhugetlbfs-test and LTP ptrace01 tests. For example: [ 44.576072] WARNING: CPU: 1 PID: 3028 at kernel/cgroup/cgroup.c:5932 cgroup_exit+0x148/0x160 [ 44.577724] Modules linked in: crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel sr_mod cdrom bochs_drm sg ttm ata_generic pata_acpi ppdev drm_kms_helper snd_pcm syscopyarea aesni_intel snd_timer sysfillrect sysimgblt snd crypto_simd cryptd glue_helper soundcore fb_sys_fops joydev drm serio_raw pcspkr ata_piix libata i2c_piix4 floppy parport_pc parport ip_tables [ 44.583106] CPU: 1 PID: 3028 Comm: ptrace-write-hu Not tainted 5.1.0-rc3-00053-g9262503 #5 [ 44.584600] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 44.586116] RIP: 0010:cgroup_exit+0x148/0x160 [ 44.587135] Code: 0f 84 50 ff ff ff 48 8b 85 c8 0c 00 00 48 8b 78 70 e8 ec 2e 00 00 e9 3b ff ff ff f0 ff 43 60 0f 88 72 21 89 00 e9 48 ff ff ff <0f> 0b e9 1b ff ff ff e8 3c 73 f4 ff 66 90 66 2e 0f 1f 84 00 00 00 [ 44.590113] RSP: 0018:ffffb25702dcfd30 EFLAGS: 00010002 [ 44.591167] RAX: ffff96a7fee32410 RBX: ffff96a7ff1d6000 RCX: dead000000000200 [ 44.592446] RDX: ffff96a7ff1d6080 RSI: ffff96a7fec75290 RDI: ffff96a7fec75290 [ 44.593715] RBP: ffff96a7fec745c0 R08: ffff96a7fec74658 R09: 0000000000000000 [ 44.594985] R10: 0000000000000000 R11: 0000000000000001 R12: ffff96a7fec75101 [ 44.596266] R13: ffff96a7fec745c0 R14: ffff96a7ff3bde30 R15: ffff96a7fec75130 [ 44.597550] FS: 0000000000000000(0000) GS:ffff96a7dd700000(0000) knlGS:0000000000000000 [ 44.598950] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 44.600098] CR2: 00000000f7a00000 CR3: 000000000d20e000 CR4: 00000000000406e0 [ 44.601417] Call Trace: [ 44.602777] do_exit+0x337/0xc40 [ 44.603677] do_group_exit+0x3a/0xa0 [ 44.604610] get_signal+0x12e/0x8d0 [ 44.605533] ? __switch_to_asm+0x40/0x70 [ 44.606503] do_signal+0x36/0x650 [ 44.607409] ? __switch_to_asm+0x40/0x70 [ 44.608383] ? __schedule+0x267/0x860 [ 44.609329] exit_to_usermode_loop+0x89/0xf0 [ 44.610349] do_fast_syscall_32+0x251/0x2e3 [ 44.611357] entry_SYSENTER_compat+0x7f/0x91 [ 44.612376] ---[ end trace e4ca5cfc4b7f7964 ]--- The problem is caused by the ptrace_signal() call in the for loop in get_signal(). There is a cgroup_enter_frozen() call inside ptrace_signal(), so after exit from ptrace_signal() the task->frozen bit might be set. In this case do_group_exit() can be called with the task->frozen bit set and trigger the warning. This is only place where we can leave the loop with the task->frozen bit set and without setting JOBCTL_TRAP_FREEZE and TIF_SIGPENDING. To resolve this problem, let's move cgroup_leave_frozen(true) call to just after the fatal label. If the task is going to die, the frozen bit must be cleared no matter how we get into this point. Reported-by: kernel test robot Reported-by: Qian Cai Cc: Oleg Nesterov Cc: Tejun Heo Signed-off-by: Roman Gushchin Signed-off-by: Tejun Heo --- kernel/signal.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 16b72f4f14df..8607b11ff936 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2483,10 +2483,6 @@ relock: ksig->info.si_signo = signr = SIGKILL; sigdelset(¤t->pending.signal, SIGKILL); recalc_sigpending(); - current->jobctl &= ~JOBCTL_TRAP_FREEZE; - spin_unlock_irq(&sighand->siglock); - if (unlikely(cgroup_task_frozen(current))) - cgroup_leave_frozen(true); goto fatal; } @@ -2608,8 +2604,10 @@ relock: continue; } - spin_unlock_irq(&sighand->siglock); fatal: + spin_unlock_irq(&sighand->siglock); + if (unlikely(cgroup_task_frozen(current))) + cgroup_leave_frozen(true); /* * Anything else is fatal, maybe with a core dump. -- cgit From c3b7112df86b769927a60a6d7175988ca3d60f09 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 10 May 2019 11:53:46 +0200 Subject: fork: do not release lock that wasn't taken Avoid calling cgroup_threadgroup_change_end() without having called cgroup_threadgroup_change_begin() first. During process creation we need to check whether the cgroup we are in allows us to fork. To perform this check the cgroup needs to guard itself against threadgroup changes and takes a lock. Prior to CLONE_PIDFD the cleanup target "bad_fork_free_pid" would also need to call cgroup_threadgroup_change_end() because said lock had already been taken. However, this is not the case anymore with the addition of CLONE_PIDFD. We are now allocating a pidfd before we check whether the cgroup we're in can fork and thus prior to taking the lock. So when copy_process() fails at the right step it would release a lock we haven't taken. This bug is not even very subtle to be honest. It's just not very clear from the naming of cgroup_threadgroup_change_{begin,end}() that a lock is taken. Here's the relevant splat: entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7fec849 Code: 85 d2 74 02 89 0a 5b 5d c3 8b 04 24 c3 8b 14 24 c3 8b 3c 24 c3 90 90 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 002b:00000000ffed5a8c EFLAGS: 00000246 ORIG_RAX: 0000000000000078 RAX: ffffffffffffffda RBX: 0000000000003ffc RCX: 0000000000000000 RDX: 00000000200005c0 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000012 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(depth <= 0) WARNING: CPU: 1 PID: 7744 at kernel/locking/lockdep.c:4052 __lock_release kernel/locking/lockdep.c:4052 [inline] WARNING: CPU: 1 PID: 7744 at kernel/locking/lockdep.c:4052 lock_release+0x667/0xa00 kernel/locking/lockdep.c:4321 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 7744 Comm: syz-executor007 Not tainted 5.1.0+ #4 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 panic+0x2cb/0x65c kernel/panic.c:214 __warn.cold+0x20/0x45 kernel/panic.c:566 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:179 [inline] fixup_bug arch/x86/kernel/traps.c:174 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:972 RIP: 0010:__lock_release kernel/locking/lockdep.c:4052 [inline] RIP: 0010:lock_release+0x667/0xa00 kernel/locking/lockdep.c:4321 Code: 0f 85 a0 03 00 00 8b 35 77 66 08 08 85 f6 75 23 48 c7 c6 a0 55 6b 87 48 c7 c7 40 25 6b 87 4c 89 85 70 ff ff ff e8 b7 a9 eb ff <0f> 0b 4c 8b 85 70 ff ff ff 4c 89 ea 4c 89 e6 4c 89 c7 e8 52 63 ff RSP: 0018:ffff888094117b48 EFLAGS: 00010086 RAX: 0000000000000000 RBX: 1ffff11012822f6f RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff815af236 RDI: ffffed1012822f5b RBP: ffff888094117c00 R08: ffff888092bfc400 R09: fffffbfff113301d R10: fffffbfff113301c R11: ffffffff889980e3 R12: ffffffff8a451df8 R13: ffffffff8142e71f R14: ffffffff8a44cc80 R15: ffff888094117bd8 percpu_up_read.constprop.0+0xcb/0x110 include/linux/percpu-rwsem.h:92 cgroup_threadgroup_change_end include/linux/cgroup-defs.h:712 [inline] copy_process.part.0+0x47ff/0x6710 kernel/fork.c:2222 copy_process kernel/fork.c:1772 [inline] _do_fork+0x25d/0xfd0 kernel/fork.c:2338 __do_compat_sys_x86_clone arch/x86/ia32/sys_ia32.c:240 [inline] __se_compat_sys_x86_clone arch/x86/ia32/sys_ia32.c:236 [inline] __ia32_compat_sys_x86_clone+0xbc/0x140 arch/x86/ia32/sys_ia32.c:236 do_syscall_32_irqs_on arch/x86/entry/common.c:334 [inline] do_fast_syscall_32+0x281/0xd54 arch/x86/entry/common.c:405 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 RIP: 0023:0xf7fec849 Code: 85 d2 74 02 89 0a 5b 5d c3 8b 04 24 c3 8b 14 24 c3 8b 3c 24 c3 90 90 90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 002b:00000000ffed5a8c EFLAGS: 00000246 ORIG_RAX: 0000000000000078 RAX: ffffffffffffffda RBX: 0000000000003ffc RCX: 0000000000000000 RDX: 00000000200005c0 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000012 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Kernel Offset: disabled Rebooting in 86400 seconds.. Reported-and-tested-by: syzbot+3286e58549edc479faae@syzkaller.appspotmail.com Fixes: b3e583825266 ("clone: add CLONE_PIDFD") Signed-off-by: Christian Brauner --- kernel/fork.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 5359facf9867..737db1828437 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2102,7 +2102,7 @@ static __latent_entropy struct task_struct *copy_process( */ retval = cgroup_can_fork(p); if (retval) - goto bad_fork_put_pidfd; + goto bad_fork_cgroup_threadgroup_change_end; /* * From this point on we must avoid any synchronous user-space @@ -2217,11 +2217,12 @@ bad_fork_cancel_cgroup: spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p); +bad_fork_cgroup_threadgroup_change_end: + cgroup_threadgroup_change_end(current); bad_fork_put_pidfd: if (clone_flags & CLONE_PIDFD) ksys_close(pidfd); bad_fork_free_pid: - cgroup_threadgroup_change_end(current); if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_thread: -- cgit From 56e33afd7757d5da2664fb797f2544ce167176be Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 10 May 2019 23:47:50 +0200 Subject: livepatch: Remove klp_check_compiler_support() The only purpose of klp_check_compiler_support() is to make sure that we are not using ftrace on x86 via mcount (because that's executed only after prologue has already happened, and that's too late for livepatching purposes). Now that mcount is not supported by ftrace any more, there is no need for klp_check_compiler_support() either. Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1905102346100.17054@cbobk.fhfr.pm Reported-by: Linus Torvalds Signed-off-by: Jiri Kosina Signed-off-by: Steven Rostedt (VMware) --- arch/powerpc/include/asm/livepatch.h | 5 ----- arch/s390/include/asm/livepatch.h | 5 ----- arch/x86/include/asm/livepatch.h | 5 ----- kernel/livepatch/core.c | 8 -------- 4 files changed, 23 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 5070df19d463..c005aee5ea43 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -24,11 +24,6 @@ #include #ifdef CONFIG_LIVEPATCH -static inline int klp_check_compiler_support(void) -{ - return 0; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->nip = ip; diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 672f95b12d40..818612b784cd 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -13,11 +13,6 @@ #include -static inline int klp_check_compiler_support(void) -{ - return 0; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->psw.addr = ip; diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index 2f2bdf0662f8..a66f6706c2de 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -24,11 +24,6 @@ #include #include -static inline int klp_check_compiler_support(void) -{ - return 0; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index eb0ee10a1981..112a36ed4a09 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -1220,14 +1220,6 @@ void klp_module_going(struct module *mod) static int __init klp_init(void) { - int ret; - - ret = klp_check_compiler_support(); - if (ret) { - pr_info("Your compiler is too old; turning off.\n"); - return -EINVAL; - } - klp_root_kobj = kobject_create_and_add("livepatch", kernel_kobj); if (!klp_root_kobj) return -ENOMEM; -- cgit From af959b18fd447170a10865283ba691af4353cc7f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 May 2019 03:03:09 +0200 Subject: bpf: fix out of bounds backwards jmps due to dead code removal systemtap folks reported the following splat recently: [ 7790.862212] WARNING: CPU: 3 PID: 26759 at arch/x86/kernel/kprobes/core.c:1022 kprobe_fault_handler+0xec/0xf0 [...] [ 7790.864113] CPU: 3 PID: 26759 Comm: sshd Not tainted 5.1.0-0.rc7.git1.1.fc31.x86_64 #1 [ 7790.864198] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS[...] [ 7790.864314] RIP: 0010:kprobe_fault_handler+0xec/0xf0 [ 7790.864375] Code: 48 8b 50 [...] [ 7790.864714] RSP: 0018:ffffc06800bdbb48 EFLAGS: 00010082 [ 7790.864812] RAX: ffff9e2b75a16320 RBX: 0000000000000000 RCX: 0000000000000000 [ 7790.865306] RDX: ffffffffffffffff RSI: 000000000000000e RDI: ffffc06800bdbbf8 [ 7790.865514] RBP: ffffc06800bdbbf8 R08: 0000000000000000 R09: 0000000000000000 [ 7790.865960] R10: 0000000000000000 R11: 0000000000000000 R12: ffffc06800bdbbf8 [ 7790.866037] R13: ffff9e2ab56a0418 R14: ffff9e2b6d0bb400 R15: ffff9e2b6d268000 [ 7790.866114] FS: 00007fde49937d80(0000) GS:ffff9e2b75a00000(0000) knlGS:0000000000000000 [ 7790.866193] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7790.866318] CR2: 0000000000000000 CR3: 000000012f312000 CR4: 00000000000006e0 [ 7790.866419] Call Trace: [ 7790.866677] do_user_addr_fault+0x64/0x480 [ 7790.867513] do_page_fault+0x33/0x210 [ 7790.868002] async_page_fault+0x1e/0x30 [ 7790.868071] RIP: 0010: (null) [ 7790.868144] Code: Bad RIP value. [ 7790.868229] RSP: 0018:ffffc06800bdbca8 EFLAGS: 00010282 [ 7790.868362] RAX: ffff9e2b598b60f8 RBX: ffffc06800bdbe48 RCX: 0000000000000004 [ 7790.868629] RDX: 0000000000000004 RSI: ffffc06800bdbc6c RDI: ffff9e2b598b60f0 [ 7790.868834] RBP: ffffc06800bdbcf8 R08: 0000000000000000 R09: 0000000000000004 [ 7790.870432] R10: 00000000ff6f7a03 R11: 0000000000000000 R12: 0000000000000001 [ 7790.871859] R13: ffffc06800bdbcb8 R14: 0000000000000000 R15: ffff9e2acd0a5310 [ 7790.873455] ? vfs_read+0x5/0x170 [ 7790.874639] ? vfs_read+0x1/0x170 [ 7790.875834] ? trace_call_bpf+0xf6/0x260 [ 7790.877044] ? vfs_read+0x1/0x170 [ 7790.878208] ? vfs_read+0x5/0x170 [ 7790.879345] ? kprobe_perf_func+0x233/0x260 [ 7790.880503] ? vfs_read+0x1/0x170 [ 7790.881632] ? vfs_read+0x5/0x170 [ 7790.882751] ? kprobe_ftrace_handler+0x92/0xf0 [ 7790.883926] ? __vfs_read+0x30/0x30 [ 7790.885050] ? ftrace_ops_assist_func+0x94/0x100 [ 7790.886183] ? vfs_read+0x1/0x170 [ 7790.887283] ? vfs_read+0x5/0x170 [ 7790.888348] ? ksys_read+0x5a/0xe0 [ 7790.889389] ? do_syscall_64+0x5c/0xa0 [ 7790.890401] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe After some debugging, turns out that the logic in 2cbd95a5c4fb ("bpf: change parameters of call/branch offset adjustment") has a bug that is exposed after 52875a04f4b2 ("bpf: verifier: remove dead code") in that we miss some of the jump offset adjustments after code patching when we remove dead code, more concretely, upon backward jump spanning over the area that is being removed. BPF insns of a case that was hit pre 52875a04f4b2: [...] 676: (85) call bpf_perf_event_output#-47616 677: (05) goto pc-636 678: (62) *(u32 *)(r10 -64) = 0 679: (bf) r7 = r10 680: (07) r7 += -64 681: (05) goto pc-44 682: (05) goto pc-1 683: (05) goto pc-1 BPF insns afterwards: [...] 618: (85) call bpf_perf_event_output#-47616 619: (05) goto pc-638 620: (62) *(u32 *)(r10 -64) = 0 621: (bf) r7 = r10 622: (07) r7 += -64 623: (05) goto pc-44 To illustrate the bug, situation looks as follows: ____ 0 | | <-- foo: [...] 1 |____| 2 |____| <-- pos / end_new ^ 3 | | | 4 | | | len 5 |____| | (remove region) 6 | | <-- end_old v 7 | | 8 | | <-- curr (jmp foo) 9 |____| The condition curr >= end_new && curr + off + 1 < end_new in the branch delta adjustments is never hit because curr + off + 1 < end_new is compared as unsigned and therefore curr + off + 1 > end_new in unsigned realm as curr + off + 1 becomes negative since the insns are memmove()'d before the offset adjustments. Correct BPF insns after this fix: [...] 618: (85) call bpf_perf_event_output#-47216 619: (05) goto pc-578 620: (62) *(u32 *)(r10 -64) = 0 621: (bf) r7 = r10 622: (07) r7 += -64 623: (05) goto pc-44 Note that unprivileged case is not affected from this. Fixes: 52875a04f4b2 ("bpf: verifier: remove dead code") Fixes: 2cbd95a5c4fb ("bpf: change parameters of call/branch offset adjustment") Reported-by: Frank Ch. Eigler Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 3ba56e73c90e..242a643af82f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -338,7 +338,7 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) } static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, - s32 end_new, u32 curr, const bool probe_pass) + s32 end_new, s32 curr, const bool probe_pass) { const s64 imm_min = S32_MIN, imm_max = S32_MAX; s32 delta = end_new - end_old; @@ -356,7 +356,7 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, } static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, - s32 end_new, u32 curr, const bool probe_pass) + s32 end_new, s32 curr, const bool probe_pass) { const s32 off_min = S16_MIN, off_max = S16_MAX; s32 delta = end_new - end_old; -- cgit From ecebc5ce59a003163eb608ace38a01d7ffeb0a95 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 22 Mar 2019 18:52:27 -0700 Subject: kdb: Get rid of broken attempt to print CCVERSION in kdb summary If you drop into kdb and type "summary", it prints out a line that says this: ccversion CCVERSION ...and I don't mean that it actually prints out the version of the C compiler. It literally prints out the string "CCVERSION". The version of the C Compiler is already printed at boot up and it doesn't seem useful to replicate this in kdb. Let's just delete it. We can also delete the bit of the Makefile that called the C compiler in an attempt to pass this into kdb. This will remove one extra call to the C compiler at Makefile parse time and (very slightly) speed up builds. Signed-off-by: Douglas Anderson Reviewed-by: Masahiro Yamada Signed-off-by: Daniel Thompson --- kernel/debug/kdb/Makefile | 1 - kernel/debug/kdb/kdb_main.c | 1 - 2 files changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/Makefile b/kernel/debug/kdb/Makefile index d4fc58f4b88d..efac857c5511 100644 --- a/kernel/debug/kdb/Makefile +++ b/kernel/debug/kdb/Makefile @@ -6,7 +6,6 @@ # Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved. # -CCVERSION := $(shell $(CC) -v 2>&1 | sed -ne '$$p') obj-y := kdb_io.o kdb_main.o kdb_support.o kdb_bt.o gen-kdb_cmds.o kdb_bp.o kdb_debugger.o obj-$(CONFIG_KDB_KEYBOARD) += kdb_keyboard.o diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 82a3b32a7cfc..fc96dbf8d9de 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2522,7 +2522,6 @@ static int kdb_summary(int argc, const char **argv) kdb_printf("machine %s\n", init_uts_ns.name.machine); kdb_printf("nodename %s\n", init_uts_ns.name.nodename); kdb_printf("domainname %s\n", init_uts_ns.name.domainname); - kdb_printf("ccversion %s\n", __stringify(CCVERSION)); now = __ktime_get_real_seconds(); time64_to_tm(now, 0, &tm); -- cgit From b586627e10f57ee3aa8f0cfab0d6f7dc4ae63760 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 May 2019 15:50:18 +0300 Subject: kdb: do a sanity check on the cpu in kdb_per_cpu() The "whichcpu" comes from argv[3]. The cpu_online() macro looks up the cpu in a bitmap of online cpus, but if the value is too high then it could read beyond the end of the bitmap and possibly Oops. Fixes: 5d5314d6795f ("kdb: core for kgdb back end (1 of 2)") Signed-off-by: Dan Carpenter Reviewed-by: Douglas Anderson Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index fc96dbf8d9de..9ecfa37c7fbf 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2583,7 +2583,7 @@ static int kdb_per_cpu(int argc, const char **argv) diag = kdbgetularg(argv[3], &whichcpu); if (diag) return diag; - if (!cpu_online(whichcpu)) { + if (whichcpu >= nr_cpu_ids || !cpu_online(whichcpu)) { kdb_printf("cpu %ld is not online\n", whichcpu); return KDB_BADCPUNUM; } -- cgit From e2f7fc0ac6957cabff4cecf6c721979b571af208 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Wed, 8 May 2019 18:08:58 +0200 Subject: bpf: fix undefined behavior in narrow load handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 31fd85816dbe ("bpf: permits narrower load from bpf program context fields") made the verifier add AND instructions to clear the unwanted bits with a mask when doing a narrow load. The mask is computed with (1 << size * 8) - 1 where "size" is the size of the narrow load. When doing a 4 byte load of a an 8 byte field the verifier shifts the literal 1 by 32 places to the left. This results in an overflow of a signed integer, which is an undefined behavior. Typically, the computed mask was zero, so the result of the narrow load ended up being zero too. Cast the literal to long long to avoid overflows. Note that narrow load of the 4 byte fields does not have the undefined behavior, because the load size can only be either 1 or 2 bytes, so shifting 1 by 8 or 16 places will not overflow it. And reading 4 bytes would not be a narrow load of a 4 bytes field. Fixes: 31fd85816dbe ("bpf: permits narrower load from bpf program context fields") Reviewed-by: Alban Crequy Reviewed-by: Iago López Galeiras Signed-off-by: Krzesimir Nowak Cc: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7b05e8938d5c..95f9354495ad 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7599,7 +7599,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->dst_reg, shift); insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, - (1 << size * 8) - 1); + (1ULL << size * 8) - 1); } } -- cgit From 2baae3545327632167c0180e9ca1d467416f1919 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 May 2019 09:59:16 -0700 Subject: bpf: devmap: fix use-after-free Read in __dev_map_entry_free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit synchronize_rcu() is fine when the rcu callbacks only need to free memory (kfree_rcu() or direct kfree() call rcu call backs) __dev_map_entry_free() is a bit more complex, so we need to make sure that call queued __dev_map_entry_free() callbacks have completed. sysbot report: BUG: KASAN: use-after-free in dev_map_flush_old kernel/bpf/devmap.c:365 [inline] BUG: KASAN: use-after-free in __dev_map_entry_free+0x2a8/0x300 kernel/bpf/devmap.c:379 Read of size 8 at addr ffff8801b8da38c8 by task ksoftirqd/1/18 CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.17.0+ #39 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 dev_map_flush_old kernel/bpf/devmap.c:365 [inline] __dev_map_entry_free+0x2a8/0x300 kernel/bpf/devmap.c:379 __rcu_reclaim kernel/rcu/rcu.h:178 [inline] rcu_do_batch kernel/rcu/tree.c:2558 [inline] invoke_rcu_callbacks kernel/rcu/tree.c:2818 [inline] __rcu_process_callbacks kernel/rcu/tree.c:2785 [inline] rcu_process_callbacks+0xe9d/0x1760 kernel/rcu/tree.c:2802 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:284 run_ksoftirqd+0x86/0x100 kernel/softirq.c:645 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 kthread+0x345/0x410 kernel/kthread.c:240 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 Allocated by task 6675: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 kmem_cache_alloc_trace+0x152/0x780 mm/slab.c:3620 kmalloc include/linux/slab.h:513 [inline] kzalloc include/linux/slab.h:706 [inline] dev_map_alloc+0x208/0x7f0 kernel/bpf/devmap.c:102 find_and_alloc_map kernel/bpf/syscall.c:129 [inline] map_create+0x393/0x1010 kernel/bpf/syscall.c:453 __do_sys_bpf kernel/bpf/syscall.c:2351 [inline] __se_sys_bpf kernel/bpf/syscall.c:2328 [inline] __x64_sys_bpf+0x303/0x510 kernel/bpf/syscall.c:2328 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 26: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xd9/0x260 mm/slab.c:3813 dev_map_free+0x4fa/0x670 kernel/bpf/devmap.c:191 bpf_map_free_deferred+0xba/0xf0 kernel/bpf/syscall.c:262 process_one_work+0xc64/0x1b70 kernel/workqueue.c:2153 worker_thread+0x181/0x13a0 kernel/workqueue.c:2296 kthread+0x345/0x410 kernel/kthread.c:240 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 The buggy address belongs to the object at ffff8801b8da37c0 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 264 bytes inside of 512-byte region [ffff8801b8da37c0, ffff8801b8da39c0) The buggy address belongs to the page: page:ffffea0006e368c0 count:1 mapcount:0 mapping:ffff8801da800940 index:0xffff8801b8da3540 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0007217b88 ffffea0006e30cc8 ffff8801da800940 raw: ffff8801b8da3540 ffff8801b8da3040 0000000100000004 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801b8da3780: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801b8da3800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ffff8801b8da3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801b8da3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801b8da3980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc Fixes: 546ac1ffb70d ("bpf: add devmap, a map for storing net device references") Signed-off-by: Eric Dumazet Reported-by: syzbot+457d3e2ffbcf31aee5c0@syzkaller.appspotmail.com Acked-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 191b79948424..1e525d70f833 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -164,6 +164,9 @@ static void dev_map_free(struct bpf_map *map) bpf_clear_redirect_map(map); synchronize_rcu(); + /* Make sure prior __dev_map_entry_free() have completed. */ + rcu_barrier(); + /* To ensure all pending flush operations have completed wait for flush * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. * Because the above synchronize_rcu() ensures the map is disconnected -- cgit From 390e99cfdda1334f45c718cc02cd26eb3135f233 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 13 May 2019 12:04:36 -0700 Subject: bpf: mark bpf_event_notify and bpf_event_init as static Both of them are not declared in the headers and not used outside of bpf_trace.c file. Fixes: a38d1107f937c ("bpf: support raw tracepoints in modules") Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b496ffdf5f36..f92d6ad5e080 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1297,7 +1297,8 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, } #ifdef CONFIG_MODULES -int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module) +static int bpf_event_notify(struct notifier_block *nb, unsigned long op, + void *module) { struct bpf_trace_module *btm, *tmp; struct module *mod = module; @@ -1336,7 +1337,7 @@ static struct notifier_block bpf_module_nb = { .notifier_call = bpf_event_notify, }; -int __init bpf_event_init(void) +static int __init bpf_event_init(void) { register_module_notifier(&bpf_module_nb); return 0; -- cgit From ca976bfb3154c7bc67c4651ecd144fdf67ccaee7 Mon Sep 17 00:00:00 2001 From: Wenlin Kang Date: Mon, 13 May 2019 16:57:20 +0800 Subject: kdb: Fix bound check compiler warning The strncpy() function may leave the destination string buffer unterminated, better use strscpy() instead. This fixes the following warning with gcc 8.2: kernel/debug/kdb/kdb_io.c: In function 'kdb_getstr': kernel/debug/kdb/kdb_io.c:449:3: warning: 'strncpy' specified bound 256 equals destination size [-Wstringop-truncation] strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Wenlin Kang Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 6a4b41484afe..3a5184eb6977 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -446,7 +446,7 @@ poll_again: char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt) { if (prompt && kdb_prompt_str != prompt) - strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); + strscpy(kdb_prompt_str, prompt, CMD_BUFLEN); kdb_printf(kdb_prompt_str); kdb_nextline = 1; /* Prompt and input resets line number */ return kdb_read(buffer, bufsize); -- cgit From a9e73998f9d705c94a8dca9687633adc0f24a19a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 13 May 2019 17:15:40 -0700 Subject: kernel/sys.c: prctl: fix false positive in validate_prctl_map() While validating new map we require the @start_data to be strictly less than @end_data, which is fine for regular applications (this is why this nit didn't trigger for that long). These members are set from executable loaders such as elf handers, still it is pretty valid to have a loadable data section with zero size in file, in such case the start_data is equal to end_data once kernel loader finishes. As a result when we're trying to restore such programs the procedure fails and the kernel returns -EINVAL. From the image dump of a program: | "mm_start_code": "0x400000", | "mm_end_code": "0x8f5fb4", | "mm_start_data": "0xf1bfb0", | "mm_end_data": "0xf1bfb0", Thus we need to change validate_prctl_map from strictly less to less or equal operator use. Link: http://lkml.kernel.org/r/20190408143554.GY1421@uranus.lan Fixes: f606b77f1a9e3 ("prctl: PR_SET_MM -- introduce PR_SET_MM_MAP operation") Signed-off-by: Cyrill Gorcunov Cc: Andrey Vagin Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index 12df0e5434b8..bdbfe8d37418 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1924,7 +1924,7 @@ static int validate_prctl_map(struct prctl_mm_map *prctl_map) ((unsigned long)prctl_map->__m1 __op \ (unsigned long)prctl_map->__m2) ? 0 : -EINVAL error = __prctl_check_order(start_code, <, end_code); - error |= __prctl_check_order(start_data, <, end_data); + error |= __prctl_check_order(start_data,<=, end_data); error |= __prctl_check_order(start_brk, <=, brk); error |= __prctl_check_order(arg_start, <=, arg_end); error |= __prctl_check_order(env_start, <=, env_end); -- cgit From cefdca0a86be517bc390fc4541e3674b8e7803b0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 13 May 2019 17:16:41 -0700 Subject: userfaultfd/sysctl: add vm.unprivileged_userfaultfd Userfaultfd can be misued to make it easier to exploit existing use-after-free (and similar) bugs that might otherwise only make a short window or race condition available. By using userfaultfd to stall a kernel thread, a malicious program can keep some state that it wrote, stable for an extended period, which it can then access using an existing exploit. While it doesn't cause the exploit itself, and while it's not the only thing that can stall a kernel thread when accessing a memory location, it's one of the few that never needs privilege. We can add a flag, allowing userfaultfd to be restricted, so that in general it won't be useable by arbitrary user programs, but in environments that require userfaultfd it can be turned back on. Add a global sysctl knob "vm.unprivileged_userfaultfd" to control whether userfaultfd is allowed by unprivileged users. When this is set to zero, only privileged users (root user, or users with the CAP_SYS_PTRACE capability) will be able to use the userfaultfd syscalls. Andrea said: : The only difference between the bpf sysctl and the userfaultfd sysctl : this way is that the bpf sysctl adds the CAP_SYS_ADMIN capability : requirement, while userfaultfd adds the CAP_SYS_PTRACE requirement, : because the userfaultfd monitor is more likely to need CAP_SYS_PTRACE : already if it's doing other kind of tracking on processes runtime, in : addition of userfaultfd. In other words both syscalls works only for : root, when the two sysctl are opt-in set to 1. [dgilbert@redhat.com: changelog additions] [akpm@linux-foundation.org: documentation tweak, per Mike] Link: http://lkml.kernel.org/r/20190319030722.12441-2-peterx@redhat.com Signed-off-by: Peter Xu Suggested-by: Andrea Arcangeli Suggested-by: Mike Rapoport Reviewed-by: Mike Rapoport Reviewed-by: Andrea Arcangeli Cc: Paolo Bonzini Cc: Hugh Dickins Cc: Luis Chamberlain Cc: Maxime Coquelin Cc: Maya Gokhale Cc: Jerome Glisse Cc: Pavel Emelyanov Cc: Johannes Weiner Cc: Martin Cracauer Cc: Denis Plotnikov Cc: Marty McFadden Cc: Mike Kravetz Cc: Kees Cook Cc: Mel Gorman Cc: "Kirill A . Shutemov" Cc: "Dr . David Alan Gilbert" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 12 ++++++++++++ fs/userfaultfd.c | 5 +++++ include/linux/userfaultfd_k.h | 2 ++ kernel/sysctl.c | 12 ++++++++++++ 4 files changed, 31 insertions(+) (limited to 'kernel') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 3f13d8599337..749322060f10 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -61,6 +61,7 @@ Currently, these files are in /proc/sys/vm: - stat_refresh - numa_stat - swappiness +- unprivileged_userfaultfd - user_reserve_kbytes - vfs_cache_pressure - watermark_boost_factor @@ -818,6 +819,17 @@ The default value is 60. ============================================================== +unprivileged_userfaultfd + +This flag controls whether unprivileged users can use the userfaultfd +system calls. Set this to 1 to allow unprivileged users to use the +userfaultfd system calls, or set this to 0 to restrict userfaultfd to only +privileged users (with SYS_CAP_PTRACE capability). + +The default value is 1. + +============================================================== + - user_reserve_kbytes When overcommit_memory is set to 2, "never overcommit" mode, reserve diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f5de1e726356..3b30301c90ec 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -30,6 +30,8 @@ #include #include +int sysctl_unprivileged_userfaultfd __read_mostly = 1; + static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; enum userfaultfd_state { @@ -1930,6 +1932,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) struct userfaultfd_ctx *ctx; int fd; + if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE)) + return -EPERM; + BUG_ON(!current->mm); /* Check the UFFD_* constants for consistency. */ diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 37c9eba75c98..ac9d71e24b81 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -28,6 +28,8 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) +extern int sysctl_unprivileged_userfaultfd; + extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 599510a3355e..ba158f61aab4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "../lib/kstrtox.h" @@ -1719,6 +1720,17 @@ static struct ctl_table vm_table[] = { .extra1 = (void *)&mmap_rnd_compat_bits_min, .extra2 = (void *)&mmap_rnd_compat_bits_max, }, +#endif +#ifdef CONFIG_USERFAULTFD + { + .procname = "unprivileged_userfaultfd", + .data = &sysctl_unprivileged_userfaultfd, + .maxlen = sizeof(sysctl_unprivileged_userfaultfd), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, #endif { } }; -- cgit From 73b0140bf0fe9df90fb267c00673c4b9bf285430 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 13 May 2019 17:17:11 -0700 Subject: mm/gup: change GUP fast to use flags rather than a write 'bool' To facilitate additional options to get_user_pages_fast() change the singular write parameter to be gup_flags. This patch does not change any functionality. New functionality will follow in subsequent patches. Some of the get_user_pages_fast() call sites were unchanged because they already passed FOLL_WRITE or 0 for the write parameter. NOTE: It was suggested to change the ordering of the get_user_pages_fast() arguments to ensure that callers were converted. This breaks the current GUP call site convention of having the returned pages be the final parameter. So the suggestion was rejected. Link: http://lkml.kernel.org/r/20190328084422.29911-4-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190317183438.2057-4-ira.weiny@intel.com Signed-off-by: Ira Weiny Reviewed-by: Mike Marshall Cc: Aneesh Kumar K.V Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dan Williams Cc: "David S. Miller" Cc: Heiko Carstens Cc: Ingo Molnar Cc: James Hogan Cc: Jason Gunthorpe Cc: John Hubbard Cc: "Kirill A. Shutemov" Cc: Martin Schwidefsky Cc: Michal Hocko Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rich Felker Cc: Thomas Gleixner Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/gup.c | 11 ++++++----- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 ++-- arch/powerpc/kvm/e500_mmu.c | 2 +- arch/s390/kvm/interrupt.c | 2 +- arch/sh/mm/gup.c | 11 ++++++----- arch/sparc/mm/gup.c | 9 +++++---- arch/x86/kvm/paging_tmpl.h | 2 +- arch/x86/kvm/svm.c | 2 +- drivers/fpga/dfl-afu-dma-region.c | 2 +- drivers/gpu/drm/via/via_dmablit.c | 3 ++- drivers/infiniband/hw/hfi1/user_pages.c | 3 ++- drivers/misc/genwqe/card_utils.c | 2 +- drivers/misc/vmw_vmci/vmci_host.c | 2 +- drivers/misc/vmw_vmci/vmci_queue_pair.c | 6 ++++-- drivers/platform/goldfish/goldfish_pipe.c | 3 ++- drivers/rapidio/devices/rio_mport_cdev.c | 4 +++- drivers/sbus/char/oradax.c | 2 +- drivers/scsi/st.c | 3 ++- drivers/staging/gasket/gasket_page_table.c | 4 ++-- drivers/tee/tee_shm.c | 2 +- drivers/vfio/vfio_iommu_spapr_tce.c | 3 ++- drivers/vhost/vhost.c | 2 +- drivers/video/fbdev/pvr2fb.c | 2 +- drivers/virt/fsl_hypervisor.c | 2 +- drivers/xen/gntdev.c | 2 +- fs/orangefs/orangefs-bufmap.c | 2 +- include/linux/mm.h | 4 ++-- kernel/futex.c | 2 +- lib/iov_iter.c | 7 +++++-- mm/gup.c | 10 +++++----- mm/util.c | 8 ++++---- net/ceph/pagevec.c | 2 +- net/rds/info.c | 2 +- net/rds/rdma.c | 3 ++- 34 files changed, 73 insertions(+), 57 deletions(-) (limited to 'kernel') diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 0d14e0d8eacf..4c2b4483683c 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to + * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; @@ -273,7 +273,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, + pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); @@ -289,7 +290,7 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - pages, write ? FOLL_WRITE : 0); + pages, gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index be7bc070eae5..ab3d484c5e2e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -600,7 +600,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* If writing != 0, then the HPTE must allow writing, if we get here */ write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, writing, pages); + npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); if (npages < 1) { /* Check if it's an I/O mapping */ down_read(¤t->mm->mmap_sem); @@ -1193,7 +1193,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); + npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages); if (npages < 1) goto err; page = pages[0]; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 24296f4cadc6..e0af53fd78c5 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -783,7 +783,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, if (!pages) return -ENOMEM; - ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); + ret = get_user_pages_fast(cfg->array, num_pages, FOLL_WRITE, pages); if (ret < 0) goto free_pages; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37503ae62486..1fd706f6206c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2376,7 +2376,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) ret = -EFAULT; goto out; } - ret = get_user_pages_fast(map->addr, 1, 1, &map->page); + ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page); if (ret < 0) goto out; BUG_ON(ret != 1); diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index 3e27f6d1f1ec..277c882f7489 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -204,7 +204,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to + * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @@ -216,8 +216,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; @@ -241,7 +241,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, + pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); @@ -261,7 +262,7 @@ slow_irqon: ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, pages, - write ? FOLL_WRITE : 0); + gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index aee6dba83d0e..1e770a517d4a 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -245,8 +245,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, return nr; } -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; @@ -303,7 +303,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, + pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); @@ -324,7 +325,7 @@ slow: ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, pages, - write ? FOLL_WRITE : 0); + gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bdca39829bc..08715034e315 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -140,7 +140,7 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t *table; struct page *page; - npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); /* Check if the user is doing something meaningless. */ if (unlikely(npages != 1)) return -EFAULT; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 406b558abfef..6b92eaf4a3b1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1805,7 +1805,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, return NULL; /* Pin the user virtual address. */ - npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); + npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); if (npinned != npages) { pr_err("SEV: Failure locking %lu pages.\n", npages); goto err; diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c index e18a786fc943..c438722bf4e1 100644 --- a/drivers/fpga/dfl-afu-dma-region.c +++ b/drivers/fpga/dfl-afu-dma-region.c @@ -102,7 +102,7 @@ static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata, goto unlock_vm; } - pinned = get_user_pages_fast(region->user_addr, npages, 1, + pinned = get_user_pages_fast(region->user_addr, npages, FOLL_WRITE, region->pages); if (pinned < 0) { ret = pinned; diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c index 8bf3a7c23ed3..062067438f1d 100644 --- a/drivers/gpu/drm/via/via_dmablit.c +++ b/drivers/gpu/drm/via/via_dmablit.c @@ -243,7 +243,8 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) if (NULL == vsg->pages) return -ENOMEM; ret = get_user_pages_fast((unsigned long)xfer->mem_addr, - vsg->num_pages, vsg->direction == DMA_FROM_DEVICE, + vsg->num_pages, + vsg->direction == DMA_FROM_DEVICE ? FOLL_WRITE : 0, vsg->pages); if (ret != vsg->num_pages) { if (ret < 0) diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 24b592c6522e..78ccacaf97d0 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -105,7 +105,8 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np { int ret; - ret = get_user_pages_fast(vaddr, npages, writable, pages); + ret = get_user_pages_fast(vaddr, npages, writable ? FOLL_WRITE : 0, + pages); if (ret < 0) return ret; diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 25265fd0fd6e..89cff9d1012b 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -603,7 +603,7 @@ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, /* pin user pages in memory */ rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ m->nr_pages, - m->write, /* readable/writable */ + m->write ? FOLL_WRITE : 0, /* readable/writable */ m->page_list); /* ptrs to pages */ if (rc < 0) goto fail_get_user_pages; diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c index 997f92543dd4..422d08da3244 100644 --- a/drivers/misc/vmw_vmci/vmci_host.c +++ b/drivers/misc/vmw_vmci/vmci_host.c @@ -242,7 +242,7 @@ static int vmci_host_setup_notify(struct vmci_ctx *context, /* * Lock physical page backing a given user VA. */ - retval = get_user_pages_fast(uva, 1, 1, &context->notify_page); + retval = get_user_pages_fast(uva, 1, FOLL_WRITE, &context->notify_page); if (retval != 1) { context->notify_page = NULL; return VMCI_ERROR_GENERIC; diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index f5f1aac9d163..1174735f003d 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -659,7 +659,8 @@ static int qp_host_get_user_memory(u64 produce_uva, int err = VMCI_SUCCESS; retval = get_user_pages_fast((uintptr_t) produce_uva, - produce_q->kernel_if->num_pages, 1, + produce_q->kernel_if->num_pages, + FOLL_WRITE, produce_q->kernel_if->u.h.header_page); if (retval < (int)produce_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(produce) failed (retval=%d)", @@ -671,7 +672,8 @@ static int qp_host_get_user_memory(u64 produce_uva, } retval = get_user_pages_fast((uintptr_t) consume_uva, - consume_q->kernel_if->num_pages, 1, + consume_q->kernel_if->num_pages, + FOLL_WRITE, consume_q->kernel_if->u.h.header_page); if (retval < (int)consume_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(consume) failed (retval=%d)", diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 321bc673c417..cef0133aa47a 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -274,7 +274,8 @@ static int pin_user_pages(unsigned long first_page, *iter_last_page_size = last_page_size; } - ret = get_user_pages_fast(first_page, requested_pages, !is_write, + ret = get_user_pages_fast(first_page, requested_pages, + !is_write ? FOLL_WRITE : 0, pages); if (ret <= 0) return -EFAULT; diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 1e1f42e210a0..4a4a75fa26d5 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -868,7 +868,9 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, pinned = get_user_pages_fast( (unsigned long)xfer->loc_addr & PAGE_MASK, - nr_pages, dir == DMA_FROM_DEVICE, page_list); + nr_pages, + dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0, + page_list); if (pinned != nr_pages) { if (pinned < 0) { diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c index acd9ba40eabe..8090dc9a1514 100644 --- a/drivers/sbus/char/oradax.c +++ b/drivers/sbus/char/oradax.c @@ -437,7 +437,7 @@ static int dax_lock_page(void *va, struct page **p) dax_dbg("uva %p", va); - ret = get_user_pages_fast((unsigned long)va, 1, 1, p); + ret = get_user_pages_fast((unsigned long)va, 1, FOLL_WRITE, p); if (ret == 1) { dax_dbg("locked page %p, for VA %p", *p, va); return 0; diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 19c022e66d63..3c6a18ad9a87 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4922,7 +4922,8 @@ static int sgl_map_user_pages(struct st_buffer *STbp, /* Try to fault in all of the necessary pages */ /* rw==READ means read from drive, write into memory area */ - res = get_user_pages_fast(uaddr, nr_pages, rw == READ, pages); + res = get_user_pages_fast(uaddr, nr_pages, rw == READ ? FOLL_WRITE : 0, + pages); /* Errors and no page mapped should return here */ if (res < nr_pages) diff --git a/drivers/staging/gasket/gasket_page_table.c b/drivers/staging/gasket/gasket_page_table.c index 600928f63577..d35c4fb19e28 100644 --- a/drivers/staging/gasket/gasket_page_table.c +++ b/drivers/staging/gasket/gasket_page_table.c @@ -486,8 +486,8 @@ static int gasket_perform_mapping(struct gasket_page_table *pg_tbl, ptes[i].dma_addr = pg_tbl->coherent_pages[0].paddr + off + i * PAGE_SIZE; } else { - ret = get_user_pages_fast(page_addr - offset, 1, 1, - &page); + ret = get_user_pages_fast(page_addr - offset, 1, + FOLL_WRITE, &page); if (ret <= 0) { dev_err(pg_tbl->device, diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 0b9ab1d0dd45..49fd7312e2aa 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -273,7 +273,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } - rc = get_user_pages_fast(start, num_pages, 1, shm->pages); + rc = get_user_pages_fast(start, num_pages, FOLL_WRITE, shm->pages); if (rc > 0) shm->num_pages = rc; if (rc != num_pages) { diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 6b64e45a5269..40ddc0c5f677 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -532,7 +532,8 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) enum dma_data_direction direction = iommu_tce_direction(tce); if (get_user_pages_fast(tce & PAGE_MASK, 1, - direction != DMA_TO_DEVICE, &page) != 1) + direction != DMA_TO_DEVICE ? FOLL_WRITE : 0, + &page) != 1) return -EFAULT; *hpa = __pa((unsigned long) page_address(page)); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 351af88231ad..1e3ed41ae1f3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1704,7 +1704,7 @@ static int set_bit_to_user(int nr, void __user *addr) int bit = nr + (log % PAGE_SIZE) * 8; int r; - r = get_user_pages_fast(log, 1, 1, &page); + r = get_user_pages_fast(log, 1, FOLL_WRITE, &page); if (r < 0) return r; BUG_ON(r != 1); diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index dfed532ed606..4e4d6a0df978 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -686,7 +686,7 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf, if (!pages) return -ENOMEM; - ret = get_user_pages_fast((unsigned long)buf, nr_pages, true, pages); + ret = get_user_pages_fast((unsigned long)buf, nr_pages, FOLL_WRITE, pages); if (ret < nr_pages) { nr_pages = ret; ret = -EINVAL; diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 8ba726e600e9..6446bcab4185 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -244,7 +244,7 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) /* Get the physical addresses of the source buffer */ num_pinned = get_user_pages_fast(param.local_vaddr - lb_offset, - num_pages, param.source != -1, pages); + num_pages, param.source != -1 ? FOLL_WRITE : 0, pages); if (num_pinned != num_pages) { /* get_user_pages() failed */ diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 7cf9c51318aa..02bc815982d4 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -852,7 +852,7 @@ static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt, unsigned long xen_pfn; int ret; - ret = get_user_pages_fast(addr, 1, writeable, &page); + ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page); if (ret < 0) return ret; diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index d4811f981608..2bb916d68576 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -269,7 +269,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap, /* map the pages */ ret = get_user_pages_fast((unsigned long)user_desc->ptr, - bufmap->page_count, 1, bufmap->page_array); + bufmap->page_count, FOLL_WRITE, bufmap->page_array); if (ret < 0) return ret; diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bc677ce8f01..c3c73b3c9adc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1505,8 +1505,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages); +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages); /* Container for pinned pfns / pages */ struct frame_vector { diff --git a/kernel/futex.c b/kernel/futex.c index 6262f1534ac9..2268b97d5439 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -543,7 +543,7 @@ again: if (unlikely(should_fail_futex(fshared))) return -EFAULT; - err = get_user_pages_fast(address, 1, 1, &page); + err = get_user_pages_fast(address, 1, FOLL_WRITE, &page); /* * If write access is not required (eg. FUTEX_WAIT), try * and get read-only access. diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b396d328a764..f74fa832f3aa 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1293,7 +1293,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, len = maxpages * PAGE_SIZE; addr &= ~(PAGE_SIZE - 1); n = DIV_ROUND_UP(len, PAGE_SIZE); - res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages); + res = get_user_pages_fast(addr, n, + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, + pages); if (unlikely(res < 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; @@ -1374,7 +1376,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, p = get_pages_array(n); if (!p) return -ENOMEM; - res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p); + res = get_user_pages_fast(addr, n, + iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); if (unlikely(res < 0)) { kvfree(p); return res; diff --git a/mm/gup.c b/mm/gup.c index 113c18a98cf5..3dde6a8da670 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2062,7 +2062,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to + * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @@ -2074,8 +2074,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { unsigned long addr, len, end; int nr = 0, ret = 0; @@ -2093,7 +2093,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, if (gup_fast_permitted(start, nr_pages)) { local_irq_disable(); - gup_pgd_range(addr, end, write ? FOLL_WRITE : 0, pages, &nr); + gup_pgd_range(addr, end, gup_flags, pages, &nr); local_irq_enable(); ret = nr; } @@ -2104,7 +2104,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, pages += nr; ret = get_user_pages_unlocked(start, nr_pages - nr, pages, - write ? FOLL_WRITE : 0); + gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/mm/util.c b/mm/util.c index 43a2984bccaa..05a464929b3e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -318,7 +318,7 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast); * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to + * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @@ -339,10 +339,10 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast); * were pinned, returns -errno. */ int __weak get_user_pages_fast(unsigned long start, - int nr_pages, int write, struct page **pages) + int nr_pages, unsigned int gup_flags, + struct page **pages) { - return get_user_pages_unlocked(start, nr_pages, pages, - write ? FOLL_WRITE : 0); + return get_user_pages_unlocked(start, nr_pages, pages, gup_flags); } EXPORT_SYMBOL_GPL(get_user_pages_fast); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index d3736f5bffec..74cafc0142ea 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -27,7 +27,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data, while (got < num_pages) { rc = get_user_pages_fast( (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, write_page, pages + got); + num_pages - got, write_page ? FOLL_WRITE : 0, pages + got); if (rc < 0) break; BUG_ON(rc == 0); diff --git a/net/rds/info.c b/net/rds/info.c index e367a97a18c8..03f6fd56d237 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -193,7 +193,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, ret = -ENOMEM; goto out; } - ret = get_user_pages_fast(start, nr_pages, 1, pages); + ret = get_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); if (ret != nr_pages) { if (ret > 0) nr_pages = ret; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 182ab8430594..b340ed4fc43a 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -158,7 +158,8 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, { int ret; - ret = get_user_pages_fast(user_addr, nr_pages, write, pages); + ret = get_user_pages_fast(user_addr, nr_pages, write ? FOLL_WRITE : 0, + pages); if (ret >= 0 && ret < nr_pages) { while (ret--) -- cgit From 6f4f13e8d9e27cefd2cd88dd4fd80aa6d68b9131 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Mon, 13 May 2019 17:20:49 -0700 Subject: mm/mmu_notifier: contextual information for event triggering invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPU page table update can happens for many reasons, not only as a result of a syscall (munmap(), mprotect(), mremap(), madvise(), ...) but also as a result of kernel activities (memory compression, reclaim, migration, ...). Users of mmu notifier API track changes to the CPU page table and take specific action for them. While current API only provide range of virtual address affected by the change, not why the changes is happening. This patchset do the initial mechanical convertion of all the places that calls mmu_notifier_range_init to also provide the default MMU_NOTIFY_UNMAP event as well as the vma if it is know (most invalidation happens against a given vma). Passing down the vma allows the users of mmu notifier to inspect the new vma page protection. The MMU_NOTIFY_UNMAP is always the safe default as users of mmu notifier should assume that every for the range is going away when that event happens. A latter patch do convert mm call path to use a more appropriate events for each call. This is done as 2 patches so that no call site is forgotten especialy as it uses this following coccinelle patch: %<---------------------------------------------------------------------- @@ identifier I1, I2, I3, I4; @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *I1, +enum mmu_notifier_event event, +unsigned flags, +struct vm_area_struct *vma, struct mm_struct *I2, unsigned long I3, unsigned long I4) { ... } @@ @@ -#define mmu_notifier_range_init(range, mm, start, end) +#define mmu_notifier_range_init(range, event, flags, vma, mm, start, end) @@ expression E1, E3, E4; identifier I1; @@ <... mmu_notifier_range_init(E1, +MMU_NOTIFY_UNMAP, 0, I1, I1->vm_mm, E3, E4) ...> @@ expression E1, E2, E3, E4; identifier FN, VMA; @@ FN(..., struct vm_area_struct *VMA, ...) { <... mmu_notifier_range_init(E1, +MMU_NOTIFY_UNMAP, 0, VMA, E2, E3, E4) ...> } @@ expression E1, E2, E3, E4; identifier FN, VMA; @@ FN(...) { struct vm_area_struct *VMA; <... mmu_notifier_range_init(E1, +MMU_NOTIFY_UNMAP, 0, VMA, E2, E3, E4) ...> } @@ expression E1, E2, E3, E4; identifier FN; @@ FN(...) { <... mmu_notifier_range_init(E1, +MMU_NOTIFY_UNMAP, 0, NULL, E2, E3, E4) ...> } ---------------------------------------------------------------------->% Applied with: spatch --all-includes --sp-file mmu-notifier.spatch fs/proc/task_mmu.c --in-place spatch --sp-file mmu-notifier.spatch --dir kernel/events/ --in-place spatch --sp-file mmu-notifier.spatch --dir mm --in-place Link: http://lkml.kernel.org/r/20190326164747.24405-6-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: Ralph Campbell Reviewed-by: Ira Weiny Cc: Christian König Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Jan Kara Cc: Andrea Arcangeli Cc: Peter Xu Cc: Felix Kuehling Cc: Jason Gunthorpe Cc: Ross Zwisler Cc: Dan Williams Cc: Paolo Bonzini Cc: Radim Krcmar Cc: Michal Hocko Cc: Christian Koenig Cc: John Hubbard Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- include/linux/mmu_notifier.h | 5 ++++- kernel/events/uprobes.c | 3 ++- mm/huge_memory.c | 12 ++++++++---- mm/hugetlb.c | 12 ++++++++---- mm/khugepaged.c | 3 ++- mm/ksm.c | 6 ++++-- mm/madvise.c | 3 ++- mm/memory.c | 25 ++++++++++++++++--------- mm/migrate.c | 5 ++++- mm/mprotect.c | 3 ++- mm/mremap.c | 3 ++- mm/oom_kill.c | 3 ++- mm/rmap.c | 6 ++++-- 14 files changed, 62 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 95ca1fe7283c..ea464f2b9867 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1169,7 +1169,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, break; } - mmu_notifier_range_init(&range, mm, 0, -1UL); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, + NULL, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 2386e71ac1b8..62f94cd85455 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -356,6 +356,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, + enum mmu_notifier_event event, + unsigned flags, + struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -491,7 +494,7 @@ static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, range->end = end; } -#define mmu_notifier_range_init(range, mm, start, end) \ +#define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \ _mmu_notifier_range_init(range, start, end) static inline bool diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4ca7364c956d..e34b699f3865 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -161,7 +161,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct mmu_notifier_range range; struct mem_cgroup *memcg; - mmu_notifier_range_init(&range, mm, addr, addr + PAGE_SIZE); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, addr, + addr + PAGE_SIZE); VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 50c665b12cf1..428b5794f4b8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1224,7 +1224,8 @@ static vm_fault_t do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, cond_resched(); } - mmu_notifier_range_init(&range, vma->vm_mm, haddr, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + haddr, haddr + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -1388,7 +1389,8 @@ alloc: vma, HPAGE_PMD_NR); __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, vma->vm_mm, haddr, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + haddr, haddr + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -2064,7 +2066,8 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, spinlock_t *ptl; struct mmu_notifier_range range; - mmu_notifier_range_init(&range, vma->vm_mm, address & HPAGE_PUD_MASK, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + address & HPAGE_PUD_MASK, (address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE); mmu_notifier_invalidate_range_start(&range); ptl = pud_lock(vma->vm_mm, pud); @@ -2282,7 +2285,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, spinlock_t *ptl; struct mmu_notifier_range range; - mmu_notifier_range_init(&range, vma->vm_mm, address & HPAGE_PMD_MASK, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + address & HPAGE_PMD_MASK, (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); ptl = pmd_lock(vma->vm_mm, pmd); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 98a3c7c224cb..89d206d6ecf3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3294,7 +3294,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; if (cow) { - mmu_notifier_range_init(&range, src, vma->vm_start, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, src, + vma->vm_start, vma->vm_end); mmu_notifier_invalidate_range_start(&range); } @@ -3406,7 +3407,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, /* * If sharing possible, alert mmu notifiers of worst case. */ - mmu_notifier_range_init(&range, mm, start, end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start, + end); adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); mmu_notifier_invalidate_range_start(&range); address = start; @@ -3673,7 +3675,8 @@ retry_avoidcopy: pages_per_huge_page(h)); __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, mm, haddr, haddr + huge_page_size(h)); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, haddr, + haddr + huge_page_size(h)); mmu_notifier_invalidate_range_start(&range); /* @@ -4408,7 +4411,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * start/end. Set range.start/range.end to cover the maximum possible * range if PMD sharing is possible. */ - mmu_notifier_range_init(&range, mm, start, end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start, + end); adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); BUG_ON(address >= end); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 7ba7a1e4fa79..14581dbf62a5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1016,7 +1016,8 @@ static void collapse_huge_page(struct mm_struct *mm, pte = pte_offset_map(pmd, address); pte_ptl = pte_lockptr(mm, pmd); - mmu_notifier_range_init(&range, mm, address, address + HPAGE_PMD_SIZE); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, NULL, mm, + address, address + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */ /* diff --git a/mm/ksm.c b/mm/ksm.c index fc64874dc6f4..01f5fe2c90cf 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1066,7 +1066,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, BUG_ON(PageTransCompound(page)); - mmu_notifier_range_init(&range, mm, pvmw.address, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, + pvmw.address, pvmw.address + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -1154,7 +1155,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, if (!pmd) goto out; - mmu_notifier_range_init(&range, mm, addr, addr + PAGE_SIZE); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, addr, + addr + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); diff --git a/mm/madvise.c b/mm/madvise.c index bb3a4554d5d5..1c52bdf1b696 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -472,7 +472,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, range.end = min(vma->vm_end, end_addr); if (range.end <= vma->vm_start) return -EINVAL; - mmu_notifier_range_init(&range, mm, range.start, range.end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, + range.start, range.end); lru_add_drain(); tlb_gather_mmu(&tlb, mm, range.start, range.end); diff --git a/mm/memory.c b/mm/memory.c index f7d962d7de19..90672674c582 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1010,7 +1010,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, is_cow = is_cow_mapping(vma->vm_flags); if (is_cow) { - mmu_notifier_range_init(&range, src_mm, addr, end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, + src_mm, addr, end); mmu_notifier_invalidate_range_start(&range); } @@ -1334,7 +1335,8 @@ void unmap_vmas(struct mmu_gather *tlb, { struct mmu_notifier_range range; - mmu_notifier_range_init(&range, vma->vm_mm, start_addr, end_addr); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + start_addr, end_addr); mmu_notifier_invalidate_range_start(&range); for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) unmap_single_vma(tlb, vma, start_addr, end_addr, NULL); @@ -1356,7 +1358,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, struct mmu_gather tlb; lru_add_drain(); - mmu_notifier_range_init(&range, vma->vm_mm, start, start + size); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + start, start + size); tlb_gather_mmu(&tlb, vma->vm_mm, start, range.end); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); @@ -1382,7 +1385,8 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr struct mmu_gather tlb; lru_add_drain(); - mmu_notifier_range_init(&range, vma->vm_mm, address, address + size); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + address, address + size); tlb_gather_mmu(&tlb, vma->vm_mm, address, range.end); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); @@ -2279,7 +2283,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, mm, vmf->address & PAGE_MASK, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, + vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -4104,8 +4109,9 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, goto out; if (range) { - mmu_notifier_range_init(range, mm, address & PMD_MASK, - (address & PMD_MASK) + PMD_SIZE); + mmu_notifier_range_init(range, MMU_NOTIFY_UNMAP, 0, + NULL, mm, address & PMD_MASK, + (address & PMD_MASK) + PMD_SIZE); mmu_notifier_invalidate_range_start(range); } *ptlp = pmd_lock(mm, pmd); @@ -4122,8 +4128,9 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, goto out; if (range) { - mmu_notifier_range_init(range, mm, address & PAGE_MASK, - (address & PAGE_MASK) + PAGE_SIZE); + mmu_notifier_range_init(range, MMU_NOTIFY_UNMAP, 0, NULL, mm, + address & PAGE_MASK, + (address & PAGE_MASK) + PAGE_SIZE); mmu_notifier_invalidate_range_start(range); } ptep = pte_offset_map_lock(mm, pmd, address, ptlp); diff --git a/mm/migrate.c b/mm/migrate.c index a1770403ff7f..855bdb3b3333 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2356,7 +2356,8 @@ static void migrate_vma_collect(struct migrate_vma *migrate) mm_walk.mm = migrate->vma->vm_mm; mm_walk.private = migrate; - mmu_notifier_range_init(&range, mm_walk.mm, migrate->start, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, NULL, mm_walk.mm, + migrate->start, migrate->end); mmu_notifier_invalidate_range_start(&range); walk_page_range(migrate->start, migrate->end, &mm_walk); @@ -2764,6 +2765,8 @@ static void migrate_vma_pages(struct migrate_vma *migrate) notified = true; mmu_notifier_range_init(&range, + MMU_NOTIFY_UNMAP, 0, + NULL, migrate->vma->vm_mm, addr, migrate->end); mmu_notifier_invalidate_range_start(&range); diff --git a/mm/mprotect.c b/mm/mprotect.c index 028c724dcb1a..b10984052ae9 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -185,7 +185,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, /* invoke the mmu notifier if the pmd is populated */ if (!range.start) { - mmu_notifier_range_init(&range, vma->vm_mm, addr, end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, + vma, vma->vm_mm, addr, end); mmu_notifier_invalidate_range_start(&range); } diff --git a/mm/mremap.c b/mm/mremap.c index e3edef6b7a12..fc241d23cd97 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -249,7 +249,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); - mmu_notifier_range_init(&range, vma->vm_mm, old_addr, old_end); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + old_addr, old_end); mmu_notifier_invalidate_range_start(&range); for (; old_addr < old_end; old_addr += extent, new_addr += extent) { diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3a2484884cfd..539c91d0b26a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -531,7 +531,8 @@ bool __oom_reap_task_mm(struct mm_struct *mm) struct mmu_notifier_range range; struct mmu_gather tlb; - mmu_notifier_range_init(&range, mm, vma->vm_start, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, + vma, mm, vma->vm_start, vma->vm_end); tlb_gather_mmu(&tlb, mm, range.start, range.end); if (mmu_notifier_invalidate_range_start_nonblock(&range)) { diff --git a/mm/rmap.c b/mm/rmap.c index 76c8dfd3ae1c..288e636b7813 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -896,7 +896,8 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, * We have to assume the worse case ie pmd for invalidation. Note that * the page can not be free from this function. */ - mmu_notifier_range_init(&range, vma->vm_mm, address, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + address, min(vma->vm_end, address + (PAGE_SIZE << compound_order(page)))); mmu_notifier_invalidate_range_start(&range); @@ -1371,7 +1372,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ - mmu_notifier_range_init(&range, vma->vm_mm, address, + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + address, min(vma->vm_end, address + (PAGE_SIZE << compound_order(page)))); if (PageHuge(page)) { -- cgit From 7269f999934b289da7972e975b781417b07ef836 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Mon, 13 May 2019 17:20:53 -0700 Subject: mm/mmu_notifier: use correct mmu_notifier events for each invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This updates each existing invalidation to use the correct mmu notifier event that represent what is happening to the CPU page table. See the patch which introduced the events to see the rational behind this. Link: http://lkml.kernel.org/r/20190326164747.24405-7-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: Ralph Campbell Reviewed-by: Ira Weiny Cc: Christian König Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Jan Kara Cc: Andrea Arcangeli Cc: Peter Xu Cc: Felix Kuehling Cc: Jason Gunthorpe Cc: Ross Zwisler Cc: Dan Williams Cc: Paolo Bonzini Cc: Radim Krcmar Cc: Michal Hocko Cc: Christian Koenig Cc: John Hubbard Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++-- kernel/events/uprobes.c | 2 +- mm/huge_memory.c | 14 ++++++-------- mm/hugetlb.c | 8 ++++---- mm/khugepaged.c | 2 +- mm/ksm.c | 4 ++-- mm/madvise.c | 2 +- mm/memory.c | 14 +++++++------- mm/migrate.c | 4 ++-- mm/mprotect.c | 5 +++-- mm/rmap.c | 6 +++--- 11 files changed, 32 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ea464f2b9867..01d4eb0e6bd1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1169,8 +1169,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, break; } - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, - NULL, mm, 0, -1UL); + mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, + 0, NULL, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e34b699f3865..78f61bfc6b79 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -161,7 +161,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct mmu_notifier_range range; struct mem_cgroup *memcg; - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, addr, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, addr + PAGE_SIZE); VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 428b5794f4b8..61b1e05e86ee 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1224,9 +1224,8 @@ static vm_fault_t do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, cond_resched(); } - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, - haddr, - haddr + HPAGE_PMD_SIZE); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + haddr, haddr + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); @@ -1389,9 +1388,8 @@ alloc: vma, HPAGE_PMD_NR); __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, - haddr, - haddr + HPAGE_PMD_SIZE); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + haddr, haddr + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); spin_lock(vmf->ptl); @@ -2066,7 +2064,7 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, spinlock_t *ptl; struct mmu_notifier_range range; - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, address & HPAGE_PUD_MASK, (address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -2285,7 +2283,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, spinlock_t *ptl; struct mmu_notifier_range range; - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, address & HPAGE_PMD_MASK, (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 89d206d6ecf3..cab38ef30238 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3294,7 +3294,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; if (cow) { - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, src, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src, vma->vm_start, vma->vm_end); mmu_notifier_invalidate_range_start(&range); @@ -3675,7 +3675,7 @@ retry_avoidcopy: pages_per_huge_page(h)); __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, haddr, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, haddr, haddr + huge_page_size(h)); mmu_notifier_invalidate_range_start(&range); @@ -4411,8 +4411,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * start/end. Set range.start/range.end to cover the maximum possible * range if PMD sharing is possible. */ - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start, - end); + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, + 0, vma, mm, start, end); adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); BUG_ON(address >= end); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 14581dbf62a5..a335f7c1fac4 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1016,7 +1016,7 @@ static void collapse_huge_page(struct mm_struct *mm, pte = pte_offset_map(pmd, address); pte_ptl = pte_lockptr(mm, pmd); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, NULL, mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, address, address + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */ diff --git a/mm/ksm.c b/mm/ksm.c index 01f5fe2c90cf..81c20ed57bf6 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1066,7 +1066,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, BUG_ON(PageTransCompound(page)); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, pvmw.address, pvmw.address + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -1155,7 +1155,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, if (!pmd) goto out; - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, addr, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, addr + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); diff --git a/mm/madvise.c b/mm/madvise.c index 1c52bdf1b696..628022e674a7 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -472,7 +472,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, range.end = min(vma->vm_end, end_addr); if (range.end <= vma->vm_start) return -EINVAL; - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, range.start, range.end); lru_add_drain(); diff --git a/mm/memory.c b/mm/memory.c index 90672674c582..9b68a72f8c17 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1010,8 +1010,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, is_cow = is_cow_mapping(vma->vm_flags); if (is_cow) { - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, - src_mm, addr, end); + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, + 0, vma, src_mm, addr, end); mmu_notifier_invalidate_range_start(&range); } @@ -1358,7 +1358,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, struct mmu_gather tlb; lru_add_drain(); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, start, start + size); tlb_gather_mmu(&tlb, vma->vm_mm, start, range.end); update_hiwater_rss(vma->vm_mm); @@ -1385,7 +1385,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr struct mmu_gather tlb; lru_add_drain(); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, address, address + size); tlb_gather_mmu(&tlb, vma->vm_mm, address, range.end); update_hiwater_rss(vma->vm_mm); @@ -2283,7 +2283,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) __SetPageUptodate(new_page); - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -4109,7 +4109,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, goto out; if (range) { - mmu_notifier_range_init(range, MMU_NOTIFY_UNMAP, 0, + mmu_notifier_range_init(range, MMU_NOTIFY_CLEAR, 0, NULL, mm, address & PMD_MASK, (address & PMD_MASK) + PMD_SIZE); mmu_notifier_invalidate_range_start(range); @@ -4128,7 +4128,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, goto out; if (range) { - mmu_notifier_range_init(range, MMU_NOTIFY_UNMAP, 0, NULL, mm, + mmu_notifier_range_init(range, MMU_NOTIFY_CLEAR, 0, NULL, mm, address & PAGE_MASK, (address & PAGE_MASK) + PAGE_SIZE); mmu_notifier_invalidate_range_start(range); diff --git a/mm/migrate.c b/mm/migrate.c index 855bdb3b3333..f2ecc2855a12 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2356,7 +2356,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate) mm_walk.mm = migrate->vma->vm_mm; mm_walk.private = migrate; - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, NULL, mm_walk.mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm, migrate->start, migrate->end); mmu_notifier_invalidate_range_start(&range); @@ -2765,7 +2765,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate) notified = true; mmu_notifier_range_init(&range, - MMU_NOTIFY_UNMAP, 0, + MMU_NOTIFY_CLEAR, 0, NULL, migrate->vma->vm_mm, addr, migrate->end); diff --git a/mm/mprotect.c b/mm/mprotect.c index b10984052ae9..65242f1e4457 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -185,8 +185,9 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, /* invoke the mmu notifier if the pmd is populated */ if (!range.start) { - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, - vma, vma->vm_mm, addr, end); + mmu_notifier_range_init(&range, + MMU_NOTIFY_PROTECTION_VMA, 0, + vma, vma->vm_mm, addr, end); mmu_notifier_invalidate_range_start(&range); } diff --git a/mm/rmap.c b/mm/rmap.c index 288e636b7813..0cbed70700ed 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -896,8 +896,8 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, * We have to assume the worse case ie pmd for invalidation. Note that * the page can not be free from this function. */ - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, - address, + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, + 0, vma, vma->vm_mm, address, min(vma->vm_end, address + (PAGE_SIZE << compound_order(page)))); mmu_notifier_invalidate_range_start(&range); @@ -1372,7 +1372,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, address, min(vma->vm_end, address + (PAGE_SIZE << compound_order(page)))); -- cgit From 940519f0c8b757fdcbc5d14c93cdaada20ded14c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 13 May 2019 17:21:26 -0700 Subject: mm, memory_hotplug: provide a more generic restrictions for memory hotplug arch_add_memory, __add_pages take a want_memblock which controls whether the newly added memory should get the sysfs memblock user API (e.g. ZONE_DEVICE users do not want/need this interface). Some callers even want to control where do we allocate the memmap from by configuring altmap. Add a more generic hotplug context for arch_add_memory and __add_pages. struct mhp_restrictions contains flags which contains additional features to be enabled by the memory hotplug (MHP_MEMBLOCK_API currently) and altmap for alternative memmap allocator. This patch shouldn't introduce any functional change. [akpm@linux-foundation.org: build fix] Link: http://lkml.kernel.org/r/20190408082633.2864-3-osalvador@suse.de Signed-off-by: Michal Hocko Signed-off-by: Oscar Salvador Cc: Dan Williams Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 6 +++--- arch/ia64/mm/init.c | 6 +++--- arch/powerpc/mm/mem.c | 6 +++--- arch/s390/mm/init.c | 6 +++--- arch/sh/mm/init.c | 6 +++--- arch/x86/mm/init_32.c | 6 +++--- arch/x86/mm/init_64.c | 10 +++++----- include/linux/memory_hotplug.h | 31 ++++++++++++++++++++++++------- kernel/memremap.c | 12 +++++++++--- mm/memory_hotplug.c | 11 +++++++---- 10 files changed, 63 insertions(+), 37 deletions(-) (limited to 'kernel') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ef82312860ac..ef32d4839c3f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1065,8 +1065,8 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { int flags = 0; @@ -1077,6 +1077,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, - altmap, want_memblock); + restrictions); } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index e49200e31750..379eb1f9adc9 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -666,14 +666,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 20266898f3a8..de5c591a550d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int __ref arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -127,7 +127,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altm } flush_inval_dcache_range(start, start + size); - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 5f48fc7e61d5..06bd05137a00 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -219,8 +219,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -230,7 +230,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, restrictions); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index aeb9f45c7a39..d3cd07bd2dc1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -404,15 +404,15 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 85c94f9a87f8..755dbed85531 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -850,13 +850,13 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bccff68e3267..db42c11b48fb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -777,11 +777,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock) + struct mhp_restrictions *restrictions) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -791,15 +791,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return add_pages(nid, start_pfn, nr_pages, restrictions); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3c8cf347804c..b24aca54353e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -53,6 +53,16 @@ enum { MMOP_ONLINE_MOVABLE, }; +/* + * Restrictions for the memory hotplug: + * flags: MHP_ flags + * altmap: alternative allocator for memmap array + */ +struct mhp_restrictions { + unsigned long flags; + struct vmem_altmap *altmap; +}; + /* * Zone resizing functions * @@ -101,6 +111,8 @@ extern void __online_page_free(struct page *page); extern int try_online_node(int nid); +extern int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions); extern u64 max_mem_size; extern bool memhp_auto_online; @@ -118,20 +130,27 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ +/* + * Do we want sysfs memblock files created. This will allow userspace to online + * and offline memory explicitly. Lack of this bit means that the caller has to + * call move_pfn_range_to_zone to finish the initialization. + */ + +#define MHP_MEMBLOCK_API (1<<0) + /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock); + struct mhp_restrictions *restrictions); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap, - bool want_memblock) + unsigned long nr_pages, struct mhp_restrictions *restrictions) { - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock); + struct mhp_restrictions *restrictions); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA @@ -332,8 +351,6 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource); -extern int arch_add_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); extern bool is_memblock_offlined(struct memory_block *mem); diff --git a/kernel/memremap.c b/kernel/memremap.c index a856cb5ff192..4e59d29245f4 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -148,6 +148,12 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) &pgmap->altmap : NULL; struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; + struct mhp_restrictions restrictions = { + /* + * We do not want any optional features only our own memmap + */ + .altmap = altmap, + }; pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; @@ -214,7 +220,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { error = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, NULL, false); + align_size >> PAGE_SHIFT, &restrictions); } else { error = kasan_add_zero_shadow(__va(align_start), align_size); if (error) { @@ -222,8 +228,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) goto err_kasan; } - error = arch_add_memory(nid, align_start, align_size, altmap, - false); + error = arch_add_memory(nid, align_start, align_size, + &restrictions); } if (!error) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 75f9f6590677..339d5a62d5d5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -273,12 +273,12 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap, - bool want_memblock) + unsigned long nr_pages, struct mhp_restrictions *restrictions) { unsigned long i; int err = 0; int start_sec, end_sec; + struct vmem_altmap *altmap = restrictions->altmap; /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); @@ -299,7 +299,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, for (i = start_sec; i <= end_sec; i++) { err = __add_section(nid, section_nr_to_pfn(i), altmap, - want_memblock); + restrictions->flags & MHP_MEMBLOCK_API); /* * EEXIST is finally dealt with by ioresource collision @@ -1097,6 +1097,9 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { + struct mhp_restrictions restrictions = { + .flags = MHP_MEMBLOCK_API, + }; u64 start, size; bool new_node = false; int ret; @@ -1124,7 +1127,7 @@ int __ref add_memory_resource(int nid, struct resource *res) new_node = ret; /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, NULL, true); + ret = arch_add_memory(nid, start, size, &restrictions); if (ret < 0) goto error; -- cgit From 350e88bad4964da6feabee02a1a70381bcdb087e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 13 May 2019 17:22:59 -0700 Subject: mm: memblock: make keeping memblock memory opt-in rather than opt-out Most architectures do not need the memblock memory after the page allocator is initialized, but only few enable ARCH_DISCARD_MEMBLOCK in the arch Kconfig. Replacing ARCH_DISCARD_MEMBLOCK with ARCH_KEEP_MEMBLOCK and inverting the logic makes it clear which architectures actually use memblock after system initialization and skips the necessity to add ARCH_DISCARD_MEMBLOCK to the architectures that are still missing that option. Link: http://lkml.kernel.org/r/1556102150-32517-1-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Michael Ellerman (powerpc) Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Richard Kuo Cc: Tony Luck Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: Ley Foon Tan Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Rich Felker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 2 +- arch/arm64/Kconfig | 1 + arch/hexagon/Kconfig | 1 - arch/ia64/Kconfig | 1 - arch/m68k/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/nios2/Kconfig | 1 - arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 - arch/x86/Kconfig | 1 - include/linux/memblock.h | 3 ++- kernel/kexec_file.c | 16 ++++++++-------- mm/Kconfig | 2 +- mm/memblock.c | 6 +++--- mm/page_alloc.c | 3 +-- 16 files changed, 19 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a11dfcc2a130..5fd344bd06b9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,7 +4,6 @@ config ARM default y select ARCH_32BIT_OFF_T select ARCH_CLOCKSOURCE_DATA - select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID && !KEXEC select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE @@ -22,6 +21,7 @@ config ARM select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_KEEP_MEMBLOCK if HAVE_ARCH_PFN_VALID || KEXEC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7a1aa53d188d..69a59a5d1143 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -60,6 +60,7 @@ config ARM64 select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_KEEP_MEMBLOCK select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 3e54a53208d5..b7d404bbaa0f 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -22,7 +22,6 @@ config HEXAGON select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select ARCH_DISCARD_MEMBLOCK select NEED_SG_DMA_LENGTH select NO_IOPORT_MAP select GENERIC_IOMAP diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 73a26f04644e..7468d8e50467 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -33,7 +33,6 @@ config IA64 select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB select VIRT_TO_BUS - select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index fe5cc2da6d10..218e037ef901 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -26,7 +26,6 @@ config M68K select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - select ARCH_DISCARD_MEMBLOCK select MMU_GATHER_NO_RANGE if MMU config CPU_BIG_ENDIAN diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ff8cff9fcf54..677e5bfeff47 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -5,7 +5,6 @@ config MIPS select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT select ARCH_CLOCKSOURCE_DATA - select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index ea37394ff3ea..26a9c760a98b 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -23,7 +23,6 @@ config NIOS2 select SPARSE_IRQ select USB_ARCH_HAS_HCD if USB_SUPPORT select CPU_NO_EFFICIENT_FFS - select ARCH_DISCARD_MEMBLOCK select MMU_GATHER_NO_RANGE if MMU config GENERIC_CSUM diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d7996cfaceca..8c1c636308c8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d0c046af65fa..109243fdb6ec 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -100,6 +100,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_KEEP_MEMBLOCK select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 2a77033e1e7c..b77f512bb176 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -10,7 +10,6 @@ config SUPERH select DMA_DECLARE_COHERENT select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK_NODE_MAP - select ARCH_DISCARD_MEMBLOCK select HAVE_OPROFILE select HAVE_ARCH_TRACEHOOK select HAVE_PERF_EVENTS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f21bc56e5d7b..818b361094ed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -47,7 +47,6 @@ config X86 select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_DATA select ARCH_CLOCKSOURCE_INIT - select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 47e3c0612592..676d3900e1bd 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -96,13 +96,14 @@ struct memblock { extern struct memblock memblock; extern int memblock_debug; -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK +#ifndef CONFIG_ARCH_KEEP_MEMBLOCK #define __init_memblock __meminit #define __initdata_memblock __meminitdata void memblock_discard(void); #else #define __init_memblock #define __initdata_memblock +static inline void memblock_discard(void) {} #endif #define memblock_dbg(fmt, ...) \ diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f7fb8f6a688f..072b6ee55e3f 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -500,13 +500,7 @@ static int locate_mem_hole_callback(struct resource *res, void *arg) return locate_mem_hole_bottom_up(start, end, kbuf); } -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK -static int kexec_walk_memblock(struct kexec_buf *kbuf, - int (*func)(struct resource *, void *)) -{ - return 0; -} -#else +#ifdef CONFIG_ARCH_KEEP_MEMBLOCK static int kexec_walk_memblock(struct kexec_buf *kbuf, int (*func)(struct resource *, void *)) { @@ -550,6 +544,12 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf, return ret; } +#else +static int kexec_walk_memblock(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) +{ + return 0; +} #endif /** @@ -589,7 +589,7 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN) return 0; - if (IS_ENABLED(CONFIG_ARCH_DISCARD_MEMBLOCK)) + if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) ret = kexec_walk_resources(kbuf, locate_mem_hole_callback); else ret = kexec_walk_memblock(kbuf, locate_mem_hole_callback); diff --git a/mm/Kconfig b/mm/Kconfig index 71e697e693df..c5124c2cb0b2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -136,7 +136,7 @@ config HAVE_MEMBLOCK_PHYS_MAP config HAVE_GENERIC_GUP bool -config ARCH_DISCARD_MEMBLOCK +config ARCH_KEEP_MEMBLOCK bool config MEMORY_ISOLATION diff --git a/mm/memblock.c b/mm/memblock.c index f315eca9f4a1..6bbad46f4d2c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -94,7 +94,7 @@ * :c:func:`mem_init` function frees all the memory to the buddy page * allocator. * - * If an architecure enables %CONFIG_ARCH_DISCARD_MEMBLOCK, the + * Unless an architecure enables %CONFIG_ARCH_KEEP_MEMBLOCK, the * memblock data structures will be discarded after the system * initialization compltes. */ @@ -375,7 +375,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } } -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK +#ifndef CONFIG_ARCH_KEEP_MEMBLOCK /** * memblock_discard - discard memory and reserved arrays if they were allocated */ @@ -1987,7 +1987,7 @@ unsigned long __init memblock_free_all(void) return pages; } -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK) +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) static int memblock_debug_show(struct seq_file *m, void *private) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cbda9aea0bf5..f2f3fb4921d1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1896,10 +1896,9 @@ void __init page_alloc_init_late(void) /* Reinit limits that are based on free pages after the kernel is up */ files_maxfiles_init(); #endif -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + /* Discard memblock private memory */ memblock_discard(); -#endif for_each_populated_zone(zone) set_zone_contiguous(zone); -- cgit From 640be2d1ffbc1946f1547eb89b5005ed7542de99 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 May 2019 17:23:23 -0700 Subject: kernel/memremap.c: remove the unused device_private_entry_fault() export This export has been entirely unused since it was added more than 1 1/2 years ago. Link: http://lkml.kernel.org/r/20190429115535.12793-1-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/memremap.c b/kernel/memremap.c index 4e59d29245f4..1490e63f69a9 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -45,7 +45,6 @@ vm_fault_t device_private_entry_fault(struct vm_area_struct *vma, */ return devmem->page_fault(vma, addr, page, flags, pmdp); } -EXPORT_SYMBOL(device_private_entry_fault); #endif /* CONFIG_DEVICE_PRIVATE */ static void pgmap_array_delete(struct resource *res) -- cgit From c6110222c6f49ea68169f353565eb865488a8619 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 May 2019 01:18:55 +0200 Subject: bpf: add map_lookup_elem_sys_only for lookups from syscall side Add a callback map_lookup_elem_sys_only() that map implementations could use over map_lookup_elem() from system call side in case the map implementation needs to handle the latter differently than from the BPF data path. If map_lookup_elem_sys_only() is set, this will be preferred pick for map lookups out of user space. This hook is used in a follow-up fix for LRU map, but once development window opens, we can convert other map types from map_lookup_elem() (here, the one called upon BPF_MAP_LOOKUP_ELEM cmd is meant) over to use the callback to simplify and clean up the latter. Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + kernel/bpf/syscall.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 59631dd0777c..4fb3aa2dc975 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,6 +36,7 @@ struct bpf_map_ops { void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); void (*map_release_uref)(struct bpf_map *map); + void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ad3ccf82f31d..cb5440b02e82 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -808,7 +808,10 @@ static int map_lookup_elem(union bpf_attr *attr) err = map->ops->map_peek_elem(map, value); } else { rcu_read_lock(); - ptr = map->ops->map_lookup_elem(map, key); + if (map->ops->map_lookup_elem_sys_only) + ptr = map->ops->map_lookup_elem_sys_only(map, key); + else + ptr = map->ops->map_lookup_elem(map, key); if (IS_ERR(ptr)) { err = PTR_ERR(ptr); } else if (!ptr) { -- cgit From 50b045a8c0ccf44f76640ac3eea8d80ca53979a3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 May 2019 01:18:56 +0200 Subject: bpf, lru: avoid messing with eviction heuristics upon syscall lookup One of the biggest issues we face right now with picking LRU map over regular hash table is that a map walk out of user space, for example, to just dump the existing entries or to remove certain ones, will completely mess up LRU eviction heuristics and wrong entries such as just created ones will get evicted instead. The reason for this is that we mark an entry as "in use" via bpf_lru_node_set_ref() from system call lookup side as well. Thus upon walk, all entries are being marked, so information of actual least recently used ones are "lost". In case of Cilium where it can be used (besides others) as a BPF based connection tracker, this current behavior causes disruption upon control plane changes that need to walk the map from user space to evict certain entries. Discussion result from bpfconf [0] was that we should simply just remove marking from system call side as no good use case could be found where it's actually needed there. Therefore this patch removes marking for regular LRU and per-CPU flavor. If there ever should be a need in future, the behavior could be selected via map creation flag, but due to mentioned reason we avoid this here. [0] http://vger.kernel.org/bpfconf.html Fixes: 29ba732acbee ("bpf: Add BPF_MAP_TYPE_LRU_HASH") Fixes: 8f8449384ec3 ("bpf: Add BPF_MAP_TYPE_LRU_PERCPU_HASH") Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 192d32e77db3..0f2708fde5f7 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -527,18 +527,30 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) return insn - insn_buf; } -static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) +static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, + void *key, const bool mark) { struct htab_elem *l = __htab_map_lookup_elem(map, key); if (l) { - bpf_lru_node_set_ref(&l->lru_node); + if (mark) + bpf_lru_node_set_ref(&l->lru_node); return l->key + round_up(map->key_size, 8); } return NULL; } +static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) +{ + return __htab_lru_map_lookup_elem(map, key, true); +} + +static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) +{ + return __htab_lru_map_lookup_elem(map, key, false); +} + static u32 htab_lru_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { @@ -1250,6 +1262,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_lru_map_lookup_elem, + .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, .map_update_elem = htab_lru_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, .map_gen_lookup = htab_lru_map_gen_lookup, @@ -1281,7 +1294,6 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l; void __percpu *pptr; int ret = -ENOENT; @@ -1297,8 +1309,9 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) l = __htab_map_lookup_elem(map, key); if (!l) goto out; - if (htab_is_lru(htab)) - bpf_lru_node_set_ref(&l->lru_node); + /* We do not mark LRU map element here in order to not mess up + * eviction heuristics when user space does a map walk. + */ pptr = htab_elem_get_ptr(l, map->key_size); for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, -- cgit From acb2ec3dd003b50b6fb5772057a08ec0dc45d42a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 14 May 2019 15:40:43 -0700 Subject: kernel/Makefile: don't assume that kernel/gen_ikh_data.sh is executable If the user downloads and applies patch-5.1.gz using patch(1), the x bit on kernel/gen_ikh_data.sh is not set. /bin/sh: 1: ./kernel/gen_ikh_data.sh: Permission denied Fix this by using CONFIG_SHELL. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index 298437bb2c6a..33824f0385b3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -127,7 +127,7 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz -cmd_genikh = $(srctree)/kernel/gen_ikh_data.sh $@ +cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_ikh_data.sh $@ $(obj)/kheaders_data.tar.xz: FORCE $(call cmd,genikh) -- cgit From c3f3ce049f7d97cc7ec9c01cb51d9ec74e0f37c2 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 14 May 2019 15:40:46 -0700 Subject: userfaultfd: use RCU to free the task struct when fork fails The task structure is freed while get_mem_cgroup_from_mm() holds rcu_read_lock() and dereferences mm->owner. get_mem_cgroup_from_mm() failing fork() ---- --- task = mm->owner mm->owner = NULL; free(task) if (task) *task; /* use after free */ The fix consists in freeing the task with RCU also in the fork failure case, exactly like it always happens for the regular exit(2) path. That is enough to make the rcu_read_lock hold in get_mem_cgroup_from_mm() (left side above) effective to avoid a use after free when dereferencing the task structure. An alternate possible fix would be to defer the delivery of the userfaultfd contexts to the monitor until after fork() is guaranteed to succeed. Such a change would require more changes because it would create a strict ordering dependency where the uffd methods would need to be called beyond the last potentially failing branch in order to be safe. This solution as opposed only adds the dependency to common code to set mm->owner to NULL and to free the task struct that was pointed by mm->owner with RCU, if fork ends up failing. The userfaultfd methods can still be called anywhere during the fork runtime and the monitor will keep discarding orphaned "mm" coming from failed forks in userland. This race condition couldn't trigger if CONFIG_MEMCG was set =n at build time. [aarcange@redhat.com: improve changelog, reduce #ifdefs per Michal] Link: http://lkml.kernel.org/r/20190429035752.4508-1-aarcange@redhat.com Link: http://lkml.kernel.org/r/20190325225636.11635-2-aarcange@redhat.com Fixes: 893e26e61d04 ("userfaultfd: non-cooperative: Add fork() event") Signed-off-by: Andrea Arcangeli Tested-by: zhong jiang Reported-by: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com Cc: Oleg Nesterov Cc: Jann Horn Cc: Hugh Dickins Cc: Mike Rapoport Cc: Mike Kravetz Cc: Peter Xu Cc: Jason Gunthorpe Cc: "Kirill A . Shutemov" Cc: Michal Hocko Cc: zhong jiang Cc: syzbot+cbb52e396df3e565ab02@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 737db1828437..b409e792aadc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -955,6 +955,15 @@ static void mm_init_aio(struct mm_struct *mm) #endif } +static __always_inline void mm_clear_owner(struct mm_struct *mm, + struct task_struct *p) +{ +#ifdef CONFIG_MEMCG + if (mm->owner == p) + WRITE_ONCE(mm->owner, NULL); +#endif +} + static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { #ifdef CONFIG_MEMCG @@ -1343,6 +1352,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk, free_pt: /* don't put binfmt in mmput, we haven't got module yet */ mm->binfmt = NULL; + mm_init_owner(mm, NULL); mmput(mm); fail_nomem: @@ -1726,6 +1736,21 @@ static int pidfd_create(struct pid *pid) return fd; } +static void __delayed_free_task(struct rcu_head *rhp) +{ + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + + free_task(tsk); +} + +static __always_inline void delayed_free_task(struct task_struct *tsk) +{ + if (IS_ENABLED(CONFIG_MEMCG)) + call_rcu(&tsk->rcu, __delayed_free_task); + else + free_task(tsk); +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2233,8 +2258,10 @@ bad_fork_cleanup_io: bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: - if (p->mm) + if (p->mm) { + mm_clear_owner(p->mm, p); mmput(p->mm); + } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); @@ -2265,7 +2292,7 @@ bad_fork_cleanup_count: bad_fork_free: p->state = TASK_DEAD; put_task_stack(p); - free_task(p); + delayed_free_task(p); fork_out: spin_lock_irq(¤t->sighand->siglock); hlist_del_init(&delayed.node); -- cgit From 987717e5e016a0dd3011d3bb16546672713f94e2 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 14 May 2019 15:40:50 -0700 Subject: mm: change mm_update_next_owner() to update mm->owner with WRITE_ONCE The RCU reader uses rcu_dereference() inside rcu_read_lock critical sections, so the writer shall use WRITE_ONCE. Just a cleanup, we still rely on gcc to emit atomic writes in other places. Link: http://lkml.kernel.org/r/20190325225636.11635-3-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reviewed-by: Andrew Morton Cc: Hugh Dickins Cc: Jann Horn Cc: Jason Gunthorpe Cc: "Kirill A . Shutemov" Cc: Michal Hocko Cc: Mike Kravetz Cc: Mike Rapoport Cc: Oleg Nesterov Cc: Peter Xu Cc: zhong jiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 2166c2d92ddc..8361a560cd1d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -422,7 +422,7 @@ retry: * freed task structure. */ if (atomic_read(&mm->mm_users) <= 1) { - mm->owner = NULL; + WRITE_ONCE(mm->owner, NULL); return; } @@ -462,7 +462,7 @@ retry: * most likely racing with swapoff (try_to_unuse()) or /proc or * ptrace or page migration (get_task_mm()). Mark owner as NULL. */ - mm->owner = NULL; + WRITE_ONCE(mm->owner, NULL); return; assign_new_owner: @@ -483,7 +483,7 @@ assign_new_owner: put_task_struct(c); goto retry; } - mm->owner = c; + WRITE_ONCE(mm->owner, c); task_unlock(c); put_task_struct(c); } -- cgit From 33b2d6302abc4ccea1d9b3f095e2e27b02ca264e Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 14 May 2019 15:40:56 -0700 Subject: psi: introduce state_mask to represent stalled psi states Patch series "psi: pressure stall monitors", v6. This is a respin of: https://lwn.net/ml/linux-kernel/20190308184311.144521-1-surenb%40google.com/ Android is adopting psi to detect and remedy memory pressure that results in stuttering and decreased responsiveness on mobile devices. Psi gives us the stall information, but because we're dealing with latencies in the millisecond range, periodically reading the pressure files to detect stalls in a timely fashion is not feasible. Psi also doesn't aggregate its averages at a high-enough frequency right now. This patch series extends the psi interface such that users can configure sensitive latency thresholds and use poll() and friends to be notified when these are breached. As high-frequency aggregation is costly, it implements an aggregation method that is optimized for fast, short-interval averaging, and makes the aggregation frequency adaptive, such that high-frequency updates only happen while monitored stall events are actively occurring. With these patches applied, Android can monitor for, and ward off, mounting memory shortages before they cause problems for the user. For example, using memory stall monitors in userspace low memory killer daemon (lmkd) we can detect mounting pressure and kill less important processes before device becomes visibly sluggish. In our memory stress testing psi memory monitors produce roughly 10x less false positives compared to vmpressure signals. Having ability to specify multiple triggers for the same psi metric allows other parts of Android framework to monitor memory state of the device and act accordingly. The new interface is straight-forward. The user opens one of the pressure files for writing and writes a trigger description into the file descriptor that defines the stall state - some or full, and the maximum stall time over a given window of time. E.g.: /* Signal when stall time exceeds 100ms of a 1s window */ char trigger[] = "full 100000 1000000" fd = open("/proc/pressure/memory") write(fd, trigger, sizeof(trigger)) while (poll() >= 0) { ... }; close(fd); When the monitored stall state is entered, psi adapts its aggregation frequency according to what the configured time window requires in order to emit event signals in a timely fashion. Once the stalling subsides, aggregation reverts back to normal. The trigger is associated with the open file descriptor. To stop monitoring, the user only needs to close the file descriptor and the trigger is discarded. Patches 1-6 prepare the psi code for polling support. Patch 7 implements the adaptive polling logic, the pressure growth detection optimized for short intervals, and hooks up write() and poll() on the pressure files. The patches were developed in collaboration with Johannes Weiner. This patch (of 7): The psi monitoring patches will need to determine the same states as record_times(). To avoid calculating them twice, maintain a state mask that can be consulted cheaply. Do this in a separate patch to keep the churn in the main feature patch at a minimum. This adds 4-byte state_mask member into psi_group_cpu struct which results in its first cacheline-aligned part becoming 52 bytes long. Add explicit values to enumeration element counters that affect psi_group_cpu struct size. Link: http://lkml.kernel.org/r/20190124211518.244221-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Johannes Weiner Cc: Dennis Zhou Cc: Ingo Molnar Cc: Jens Axboe Cc: Li Zefan Cc: Peter Zijlstra Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/psi_types.h | 9 ++++++--- kernel/sched/psi.c | 29 +++++++++++++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 2cf422db5d18..762c6bb16f3c 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -11,7 +11,7 @@ enum psi_task_count { NR_IOWAIT, NR_MEMSTALL, NR_RUNNING, - NR_PSI_TASK_COUNTS, + NR_PSI_TASK_COUNTS = 3, }; /* Task state bitmasks */ @@ -24,7 +24,7 @@ enum psi_res { PSI_IO, PSI_MEM, PSI_CPU, - NR_PSI_RESOURCES, + NR_PSI_RESOURCES = 3, }; /* @@ -41,7 +41,7 @@ enum psi_states { PSI_CPU_SOME, /* Only per-CPU, to weigh the CPU in the global average: */ PSI_NONIDLE, - NR_PSI_STATES, + NR_PSI_STATES = 6, }; struct psi_group_cpu { @@ -53,6 +53,9 @@ struct psi_group_cpu { /* States of the tasks belonging to this group */ unsigned int tasks[NR_PSI_TASK_COUNTS]; + /* Aggregate pressure state derived from the tasks */ + u32 state_mask; + /* Period time sampling buckets for each state of interest (ns) */ u32 times[NR_PSI_STATES]; diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 0e97ca9306ef..22c1505ad290 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -213,17 +213,17 @@ static bool test_state(unsigned int *tasks, enum psi_states state) static void get_recent_times(struct psi_group *group, int cpu, u32 *times) { struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu); - unsigned int tasks[NR_PSI_TASK_COUNTS]; u64 now, state_start; + enum psi_states s; unsigned int seq; - int s; + u32 state_mask; /* Snapshot a coherent view of the CPU state */ do { seq = read_seqcount_begin(&groupc->seq); now = cpu_clock(cpu); memcpy(times, groupc->times, sizeof(groupc->times)); - memcpy(tasks, groupc->tasks, sizeof(groupc->tasks)); + state_mask = groupc->state_mask; state_start = groupc->state_start; } while (read_seqcount_retry(&groupc->seq, seq)); @@ -239,7 +239,7 @@ static void get_recent_times(struct psi_group *group, int cpu, u32 *times) * (u32) and our reported pressure close to what's * actually happening. */ - if (test_state(tasks, s)) + if (state_mask & (1 << s)) times[s] += now - state_start; delta = times[s] - groupc->times_prev[s]; @@ -407,15 +407,15 @@ static void record_times(struct psi_group_cpu *groupc, int cpu, delta = now - groupc->state_start; groupc->state_start = now; - if (test_state(groupc->tasks, PSI_IO_SOME)) { + if (groupc->state_mask & (1 << PSI_IO_SOME)) { groupc->times[PSI_IO_SOME] += delta; - if (test_state(groupc->tasks, PSI_IO_FULL)) + if (groupc->state_mask & (1 << PSI_IO_FULL)) groupc->times[PSI_IO_FULL] += delta; } - if (test_state(groupc->tasks, PSI_MEM_SOME)) { + if (groupc->state_mask & (1 << PSI_MEM_SOME)) { groupc->times[PSI_MEM_SOME] += delta; - if (test_state(groupc->tasks, PSI_MEM_FULL)) + if (groupc->state_mask & (1 << PSI_MEM_FULL)) groupc->times[PSI_MEM_FULL] += delta; else if (memstall_tick) { u32 sample; @@ -436,10 +436,10 @@ static void record_times(struct psi_group_cpu *groupc, int cpu, } } - if (test_state(groupc->tasks, PSI_CPU_SOME)) + if (groupc->state_mask & (1 << PSI_CPU_SOME)) groupc->times[PSI_CPU_SOME] += delta; - if (test_state(groupc->tasks, PSI_NONIDLE)) + if (groupc->state_mask & (1 << PSI_NONIDLE)) groupc->times[PSI_NONIDLE] += delta; } @@ -448,6 +448,8 @@ static void psi_group_change(struct psi_group *group, int cpu, { struct psi_group_cpu *groupc; unsigned int t, m; + enum psi_states s; + u32 state_mask = 0; groupc = per_cpu_ptr(group->pcpu, cpu); @@ -480,6 +482,13 @@ static void psi_group_change(struct psi_group *group, int cpu, if (set & (1 << t)) groupc->tasks[t]++; + /* Calculate state mask representing active states */ + for (s = 0; s < NR_PSI_STATES; s++) { + if (test_state(groupc->tasks, s)) + state_mask |= (1 << s); + } + groupc->state_mask = state_mask; + write_seqcount_end(&groupc->seq); } -- cgit From 9289c5e6a78a5a9397df5fa60eb82b105abcfecf Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 14 May 2019 15:40:59 -0700 Subject: psi: make psi_enable static psi_enable is not used outside of psi.c, make it static. Link: http://lkml.kernel.org/r/20190319235619.260832-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Andrew Morton Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 22c1505ad290..281702de9772 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -140,9 +140,9 @@ static int psi_bug __read_mostly; DEFINE_STATIC_KEY_FALSE(psi_disabled); #ifdef CONFIG_PSI_DEFAULT_DISABLED -bool psi_enable; +static bool psi_enable; #else -bool psi_enable = true; +static bool psi_enable = true; #endif static int __init setup_psi(char *str) { -- cgit From bcc78db64168eb6dede056fed2999f75f7ace309 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 14 May 2019 15:41:02 -0700 Subject: psi: rename psi fields in preparation for psi trigger addition Rename psi_group structure member fields used for calculating psi totals and averages for clear distinction between them and for trigger-related fields that will be added by "psi: introduce psi monitor". [surenb@google.com: v6] Link: http://lkml.kernel.org/r/20190319235619.260832-4-surenb@google.com Link: http://lkml.kernel.org/r/20190124211518.244221-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Johannes Weiner Cc: Dennis Zhou Cc: Ingo Molnar Cc: Jens Axboe Cc: Li Zefan Cc: Peter Zijlstra Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/psi_types.h | 14 +++++++------- kernel/sched/psi.c | 41 +++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 762c6bb16f3c..4d1c1f67be18 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -69,17 +69,17 @@ struct psi_group_cpu { }; struct psi_group { - /* Protects data updated during an aggregation */ - struct mutex stat_lock; + /* Protects data used by the aggregator */ + struct mutex avgs_lock; /* Per-cpu task state & time tracking */ struct psi_group_cpu __percpu *pcpu; - /* Periodic aggregation state */ - u64 total_prev[NR_PSI_STATES - 1]; - u64 last_update; - u64 next_update; - struct delayed_work clock_work; + /* Running pressure averages */ + u64 avg_total[NR_PSI_STATES - 1]; + u64 avg_last_update; + u64 avg_next_update; + struct delayed_work avgs_work; /* Total stall times and sampled pressure averages */ u64 total[NR_PSI_STATES - 1]; diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 281702de9772..4fb4d9913bc8 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -165,7 +165,7 @@ static struct psi_group psi_system = { .pcpu = &system_group_pcpu, }; -static void psi_update_work(struct work_struct *work); +static void psi_avgs_work(struct work_struct *work); static void group_init(struct psi_group *group) { @@ -173,9 +173,9 @@ static void group_init(struct psi_group *group) for_each_possible_cpu(cpu) seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); - group->next_update = sched_clock() + psi_period; - INIT_DELAYED_WORK(&group->clock_work, psi_update_work); - mutex_init(&group->stat_lock); + group->avg_next_update = sched_clock() + psi_period; + INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); + mutex_init(&group->avgs_lock); } void __init psi_init(void) @@ -278,7 +278,7 @@ static bool update_stats(struct psi_group *group) int cpu; int s; - mutex_lock(&group->stat_lock); + mutex_lock(&group->avgs_lock); /* * Collect the per-cpu time buckets and average them into a @@ -319,7 +319,7 @@ static bool update_stats(struct psi_group *group) /* avgX= */ now = sched_clock(); - expires = group->next_update; + expires = group->avg_next_update; if (now < expires) goto out; if (now - expires >= psi_period) @@ -332,14 +332,14 @@ static bool update_stats(struct psi_group *group) * But the deltas we sample out of the per-cpu buckets above * are based on the actual time elapsing between clock ticks. */ - group->next_update = expires + ((1 + missed_periods) * psi_period); - period = now - (group->last_update + (missed_periods * psi_period)); - group->last_update = now; + group->avg_next_update = expires + ((1 + missed_periods) * psi_period); + period = now - (group->avg_last_update + (missed_periods * psi_period)); + group->avg_last_update = now; for (s = 0; s < NR_PSI_STATES - 1; s++) { u32 sample; - sample = group->total[s] - group->total_prev[s]; + sample = group->total[s] - group->avg_total[s]; /* * Due to the lockless sampling of the time buckets, * recorded time deltas can slip into the next period, @@ -359,22 +359,22 @@ static bool update_stats(struct psi_group *group) */ if (sample > period) sample = period; - group->total_prev[s] += sample; + group->avg_total[s] += sample; calc_avgs(group->avg[s], missed_periods, sample, period); } out: - mutex_unlock(&group->stat_lock); + mutex_unlock(&group->avgs_lock); return nonidle_total; } -static void psi_update_work(struct work_struct *work) +static void psi_avgs_work(struct work_struct *work) { struct delayed_work *dwork; struct psi_group *group; bool nonidle; dwork = to_delayed_work(work); - group = container_of(dwork, struct psi_group, clock_work); + group = container_of(dwork, struct psi_group, avgs_work); /* * If there is task activity, periodically fold the per-cpu @@ -391,8 +391,9 @@ static void psi_update_work(struct work_struct *work) u64 now; now = sched_clock(); - if (group->next_update > now) - delay = nsecs_to_jiffies(group->next_update - now) + 1; + if (group->avg_next_update > now) + delay = nsecs_to_jiffies( + group->avg_next_update - now) + 1; schedule_delayed_work(dwork, delay); } } @@ -546,13 +547,13 @@ void psi_task_change(struct task_struct *task, int clear, int set) */ if (unlikely((clear & TSK_RUNNING) && (task->flags & PF_WQ_WORKER) && - wq_worker_last_func(task) == psi_update_work)) + wq_worker_last_func(task) == psi_avgs_work)) wake_clock = false; while ((group = iterate_groups(task, &iter))) { psi_group_change(group, cpu, clear, set); - if (wake_clock && !delayed_work_pending(&group->clock_work)) - schedule_delayed_work(&group->clock_work, PSI_FREQ); + if (wake_clock && !delayed_work_pending(&group->avgs_work)) + schedule_delayed_work(&group->avgs_work, PSI_FREQ); } } @@ -649,7 +650,7 @@ void psi_cgroup_free(struct cgroup *cgroup) if (static_branch_likely(&psi_disabled)) return; - cancel_delayed_work_sync(&cgroup->psi.clock_work); + cancel_delayed_work_sync(&cgroup->psi.avgs_work); free_percpu(cgroup->psi.pcpu); } -- cgit From 7fc70a3999366560ad1d4f2389a78360300c2c6a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 14 May 2019 15:41:06 -0700 Subject: psi: split update_stats into parts Split update_stats into collect_percpu_times and update_averages for collect_percpu_times to be reused later inside psi monitor. Link: http://lkml.kernel.org/r/20190319235619.260832-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Johannes Weiner Cc: Dennis Zhou Cc: Ingo Molnar Cc: Jens Axboe Cc: Li Zefan Cc: Peter Zijlstra Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 57 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 4fb4d9913bc8..ace5ed97b186 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -269,17 +269,13 @@ static void calc_avgs(unsigned long avg[3], int missed_periods, avg[2] = calc_load(avg[2], EXP_300s, pct); } -static bool update_stats(struct psi_group *group) +static bool collect_percpu_times(struct psi_group *group) { u64 deltas[NR_PSI_STATES - 1] = { 0, }; - unsigned long missed_periods = 0; unsigned long nonidle_total = 0; - u64 now, expires, period; int cpu; int s; - mutex_lock(&group->avgs_lock); - /* * Collect the per-cpu time buckets and average them into a * single time sample that is normalized to wallclock time. @@ -317,11 +313,18 @@ static bool update_stats(struct psi_group *group) for (s = 0; s < NR_PSI_STATES - 1; s++) group->total[s] += div_u64(deltas[s], max(nonidle_total, 1UL)); + return nonidle_total; +} + +static u64 update_averages(struct psi_group *group, u64 now) +{ + unsigned long missed_periods = 0; + u64 expires, period; + u64 avg_next_update; + int s; + /* avgX= */ - now = sched_clock(); expires = group->avg_next_update; - if (now < expires) - goto out; if (now - expires >= psi_period) missed_periods = div_u64(now - expires, psi_period); @@ -332,7 +335,7 @@ static bool update_stats(struct psi_group *group) * But the deltas we sample out of the per-cpu buckets above * are based on the actual time elapsing between clock ticks. */ - group->avg_next_update = expires + ((1 + missed_periods) * psi_period); + avg_next_update = expires + ((1 + missed_periods) * psi_period); period = now - (group->avg_last_update + (missed_periods * psi_period)); group->avg_last_update = now; @@ -362,9 +365,8 @@ static bool update_stats(struct psi_group *group) group->avg_total[s] += sample; calc_avgs(group->avg[s], missed_periods, sample, period); } -out: - mutex_unlock(&group->avgs_lock); - return nonidle_total; + + return avg_next_update; } static void psi_avgs_work(struct work_struct *work) @@ -372,10 +374,16 @@ static void psi_avgs_work(struct work_struct *work) struct delayed_work *dwork; struct psi_group *group; bool nonidle; + u64 now; dwork = to_delayed_work(work); group = container_of(dwork, struct psi_group, avgs_work); + mutex_lock(&group->avgs_lock); + + now = sched_clock(); + + nonidle = collect_percpu_times(group); /* * If there is task activity, periodically fold the per-cpu * times and feed samples into the running averages. If things @@ -383,19 +391,15 @@ static void psi_avgs_work(struct work_struct *work) * Once restarted, we'll catch up the running averages in one * go - see calc_avgs() and missed_periods. */ - - nonidle = update_stats(group); + if (now >= group->avg_next_update) + group->avg_next_update = update_averages(group, now); if (nonidle) { - unsigned long delay = 0; - u64 now; - - now = sched_clock(); - if (group->avg_next_update > now) - delay = nsecs_to_jiffies( - group->avg_next_update - now) + 1; - schedule_delayed_work(dwork, delay); + schedule_delayed_work(dwork, nsecs_to_jiffies( + group->avg_next_update - now) + 1); } + + mutex_unlock(&group->avgs_lock); } static void record_times(struct psi_group_cpu *groupc, int cpu, @@ -707,11 +711,18 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) { int full; + u64 now; if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; - update_stats(group); + /* Update averages before reporting them */ + mutex_lock(&group->avgs_lock); + now = sched_clock(); + collect_percpu_times(group); + if (now >= group->avg_next_update) + group->avg_next_update = update_averages(group, now); + mutex_unlock(&group->avgs_lock); for (full = 0; full < 2 - (res == PSI_CPU); full++) { unsigned long avg[3]; -- cgit From 333f3017c5a893b000b2b4a3529814ce93fa83d7 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 14 May 2019 15:41:09 -0700 Subject: psi: track changed states Introduce changed_states parameter into collect_percpu_times to track the states changed since the last update. This will be needed to detect whether polled states activated in the monitor patch. Link: http://lkml.kernel.org/r/20190319235619.260832-6-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Johannes Weiner Cc: Dennis Zhou Cc: Ingo Molnar Cc: Jens Axboe Cc: Li Zefan Cc: Peter Zijlstra Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index ace5ed97b186..1b99eeffaa25 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -210,7 +210,8 @@ static bool test_state(unsigned int *tasks, enum psi_states state) } } -static void get_recent_times(struct psi_group *group, int cpu, u32 *times) +static void get_recent_times(struct psi_group *group, int cpu, u32 *times, + u32 *pchanged_states) { struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu); u64 now, state_start; @@ -218,6 +219,8 @@ static void get_recent_times(struct psi_group *group, int cpu, u32 *times) unsigned int seq; u32 state_mask; + *pchanged_states = 0; + /* Snapshot a coherent view of the CPU state */ do { seq = read_seqcount_begin(&groupc->seq); @@ -246,6 +249,8 @@ static void get_recent_times(struct psi_group *group, int cpu, u32 *times) groupc->times_prev[s] = times[s]; times[s] = delta; + if (delta) + *pchanged_states |= (1 << s); } } @@ -269,10 +274,11 @@ static void calc_avgs(unsigned long avg[3], int missed_periods, avg[2] = calc_load(avg[2], EXP_300s, pct); } -static bool collect_percpu_times(struct psi_group *group) +static void collect_percpu_times(struct psi_group *group, u32 *pchanged_states) { u64 deltas[NR_PSI_STATES - 1] = { 0, }; unsigned long nonidle_total = 0; + u32 changed_states = 0; int cpu; int s; @@ -287,8 +293,11 @@ static bool collect_percpu_times(struct psi_group *group) for_each_possible_cpu(cpu) { u32 times[NR_PSI_STATES]; u32 nonidle; + u32 cpu_changed_states; - get_recent_times(group, cpu, times); + get_recent_times(group, cpu, times, + &cpu_changed_states); + changed_states |= cpu_changed_states; nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]); nonidle_total += nonidle; @@ -313,7 +322,8 @@ static bool collect_percpu_times(struct psi_group *group) for (s = 0; s < NR_PSI_STATES - 1; s++) group->total[s] += div_u64(deltas[s], max(nonidle_total, 1UL)); - return nonidle_total; + if (pchanged_states) + *pchanged_states = changed_states; } static u64 update_averages(struct psi_group *group, u64 now) @@ -373,6 +383,7 @@ static void psi_avgs_work(struct work_struct *work) { struct delayed_work *dwork; struct psi_group *group; + u32 changed_states; bool nonidle; u64 now; @@ -383,7 +394,8 @@ static void psi_avgs_work(struct work_struct *work) now = sched_clock(); - nonidle = collect_percpu_times(group); + collect_percpu_times(group, &changed_states); + nonidle = changed_states & (1 << PSI_NONIDLE); /* * If there is task activity, periodically fold the per-cpu * times and feed samples into the running averages. If things @@ -719,7 +731,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) /* Update averages before reporting them */ mutex_lock(&group->avgs_lock); now = sched_clock(); - collect_percpu_times(group); + collect_percpu_times(group, NULL); if (now >= group->avg_next_update) group->avg_next_update = update_averages(group, now); mutex_unlock(&group->avgs_lock); -- cgit From 8af0c18af1425fc70686c0fdcfc0072cd8431aa0 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 14 May 2019 15:41:12 -0700 Subject: include/: refactor headers to allow kthread.h inclusion in psi_types.h kthread.h can't be included in psi_types.h because it creates a circular inclusion with kthread.h eventually including psi_types.h and complaining on kthread structures not being defined because they are defined further in the kthread.h. Resolve this by removing psi_types.h inclusion from the headers included from kthread.h. Link: http://lkml.kernel.org/r/20190319235619.260832-7-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Johannes Weiner Cc: Dennis Zhou Cc: Ingo Molnar Cc: Jens Axboe Cc: Li Zefan Cc: Peter Zijlstra Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/spi-rockchip.c | 1 + include/linux/kthread.h | 3 ++- include/linux/sched.h | 1 - kernel/kthread.c | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 3912526ead66..cdb613d38062 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2c89e60bc752..0f9da966934e 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -4,7 +4,6 @@ /* Simple interface for creating and stopping kernel threads without mess. */ #include #include -#include __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), @@ -198,6 +197,8 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); +struct cgroup_subsys_state; + #ifdef CONFIG_BLK_CGROUP void kthread_associate_blkcg(struct cgroup_subsys_state *css); struct cgroup_subsys_state *kthread_blkcg(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index a2cd15855bad..11837410690f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/kthread.c b/kernel/kthread.c index 5942eeafb9ac..be4e8795561a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include -- cgit From 0e94682b73bfa6c44c98af7a26771c9c08c055d5 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 14 May 2019 15:41:15 -0700 Subject: psi: introduce psi monitor Psi monitor aims to provide a low-latency short-term pressure detection mechanism configurable by users. It allows users to monitor psi metrics growth and trigger events whenever a metric raises above user-defined threshold within user-defined time window. Time window and threshold are both expressed in usecs. Multiple psi resources with different thresholds and window sizes can be monitored concurrently. Psi monitors activate when system enters stall state for the monitored psi metric and deactivate upon exit from the stall state. While system is in the stall state psi signal growth is monitored at a rate of 10 times per tracking window. Min window size is 500ms, therefore the min monitoring interval is 50ms. Max window size is 10s with monitoring interval of 1s. When activated psi monitor stays active for at least the duration of one tracking window to avoid repeated activations/deactivations when psi signal is bouncing. Notifications to the users are rate-limited to one per tracking window. Link: http://lkml.kernel.org/r/20190319235619.260832-8-surenb@google.com Signed-off-by: Suren Baghdasaryan Signed-off-by: Johannes Weiner Cc: Dennis Zhou Cc: Ingo Molnar Cc: Jens Axboe Cc: Li Zefan Cc: Peter Zijlstra Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/psi.txt | 107 +++++++++ include/linux/psi.h | 8 + include/linux/psi_types.h | 82 ++++++- kernel/cgroup/cgroup.c | 71 +++++- kernel/sched/psi.c | 494 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 742 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt index 7e71c9c1d8e9..5cbe5659e3b7 100644 --- a/Documentation/accounting/psi.txt +++ b/Documentation/accounting/psi.txt @@ -63,6 +63,110 @@ as well as medium and long term trends. The total absolute stall time spikes which wouldn't necessarily make a dent in the time averages, or to average trends over custom time frames. +Monitoring for pressure thresholds +================================== + +Users can register triggers and use poll() to be woken up when resource +pressure exceeds certain thresholds. + +A trigger describes the maximum cumulative stall time over a specific +time window, e.g. 100ms of total stall time within any 500ms window to +generate a wakeup event. + +To register a trigger user has to open psi interface file under +/proc/pressure/ representing the resource to be monitored and write the +desired threshold and time window. The open file descriptor should be +used to wait for trigger events using select(), poll() or epoll(). +The following format is used: + +