diff options
Diffstat (limited to 'kernel')
39 files changed, 326 insertions, 332 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0719192a3482..bf6c5f685ea2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5983,7 +5983,7 @@ const struct bpf_prog_ops bpf_syscall_prog_ops = { }; #ifdef CONFIG_SYSCTL -static int bpf_stats_handler(struct ctl_table *table, int write, +static int bpf_stats_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct static_key *key = (struct static_key *)table->data; @@ -6018,7 +6018,7 @@ void __weak unpriv_ebpf_notify(int new_state) { } -static int bpf_unpriv_handler(struct ctl_table *table, int write, +static int bpf_unpriv_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, unpriv_enable = *(int *)table->data; diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 10b454554ab0..137ba73f56fc 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -144,7 +144,7 @@ kdb_bt(int argc, const char **argv) kdb_ps_suppressed(); /* Run the active tasks first */ for_each_online_cpu(cpu) { - p = kdb_curr_task(cpu); + p = curr_task(cpu); if (kdb_bt1(p, mask, btaprompt)) return 0; } diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 3131334d7a81..6a77f1c779c4 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -206,7 +206,7 @@ char kdb_getchar(void) */ static void kdb_position_cursor(char *prompt, char *buffer, char *cp) { - kdb_printf("\r%s", kdb_prompt_str); + kdb_printf("\r%s", prompt); if (cp > buffer) kdb_printf("%.*s", (int)(cp - buffer), buffer); } @@ -362,7 +362,7 @@ poll_again: if (i >= dtab_count) kdb_printf("..."); kdb_printf("\n"); - kdb_printf(kdb_prompt_str); + kdb_printf("%s", kdb_prompt_str); kdb_printf("%s", buffer); if (cp != lastchar) kdb_position_cursor(kdb_prompt_str, buffer, cp); @@ -453,7 +453,7 @@ char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt) { if (prompt && kdb_prompt_str != prompt) strscpy(kdb_prompt_str, prompt, CMD_BUFLEN); - kdb_printf(kdb_prompt_str); + kdb_printf("%s", kdb_prompt_str); kdb_nextline = 1; /* Prompt and input resets line number */ return kdb_read(buffer, bufsize); } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 664bae55f2c9..f5f7d7fb5936 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -155,16 +155,6 @@ static char *__env[31] = { static const int __nenv = ARRAY_SIZE(__env); -struct task_struct *kdb_curr_task(int cpu) -{ - struct task_struct *p = curr_task(cpu); -#ifdef _TIF_MCA_INIT - if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && KDB_TSK(cpu)) - p = krp->p; -#endif - return p; -} - /* * Update the permissions flags (kdb_cmd_enabled) to match the * current lockdown state. @@ -1228,7 +1218,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, char *cmdbuf; int diag; struct task_struct *kdb_current = - kdb_curr_task(raw_smp_processor_id()); + curr_task(raw_smp_processor_id()); KDB_DEBUG_STATE("kdb_local 1", reason); @@ -2278,7 +2268,7 @@ void kdb_ps_suppressed(void) unsigned long cpu; const struct task_struct *p, *g; for_each_online_cpu(cpu) { - p = kdb_curr_task(cpu); + p = curr_task(cpu); if (kdb_task_state(p, "-")) ++idle; } @@ -2314,7 +2304,7 @@ void kdb_ps1(const struct task_struct *p) kdb_task_has_cpu(p), kdb_process_cpu(p), kdb_task_state_char(p), (void *)(&p->thread), - p == kdb_curr_task(raw_smp_processor_id()) ? '*' : ' ', + p == curr_task(raw_smp_processor_id()) ? '*' : ' ', p->comm); if (kdb_task_has_cpu(p)) { if (!KDB_TSK(cpu)) { @@ -2350,7 +2340,7 @@ static int kdb_ps(int argc, const char **argv) for_each_online_cpu(cpu) { if (KDB_FLAG(CMD_INTERRUPT)) return 0; - p = kdb_curr_task(cpu); + p = curr_task(cpu); if (kdb_task_state(p, mask)) kdb_ps1(p); } diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 548fd4059bf9..d2520d72b1f5 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -210,8 +210,6 @@ extern void kdb_gdb_state_pass(char *buf); #define KDB_TSK(cpu) kgdb_info[cpu].task #define KDB_TSKREGS(cpu) kgdb_info[cpu].debuggerinfo -extern struct task_struct *kdb_curr_task(int); - #define kdb_task_has_cpu(p) (task_curr(p)) #define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL) diff --git a/kernel/delayacct.c b/kernel/delayacct.c index e039b0f99a0b..dead51de8eb5 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -44,7 +44,7 @@ void delayacct_init(void) } #ifdef CONFIG_PROC_SYSCTL -static int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, +static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int state = delayacct_on; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 81de84318ccc..b1c18058d55f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -67,8 +67,8 @@ void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, { struct dma_devres match_data = { size, vaddr, dma_handle }; - dma_free_coherent(dev, size, vaddr, dma_handle); WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data)); + dma_free_coherent(dev, size, vaddr, dma_handle); } EXPORT_SYMBOL(dmam_free_coherent); diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 8d57255e5b29..8a47e52a454f 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -270,7 +270,7 @@ exit_put: * Used for sysctl_perf_event_max_stack and * sysctl_perf_event_max_contexts_per_stack. */ -int perf_event_max_stack_handler(struct ctl_table *table, int write, +int perf_event_max_stack_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int *value = table->data; diff --git a/kernel/events/core.c b/kernel/events/core.c index af2e3a06b239..aa3450bdc227 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -450,7 +450,7 @@ static void update_perf_cpu_limits(void) static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc); -int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, +int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -474,7 +474,7 @@ int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT; -int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, +int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -9328,21 +9328,19 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD; int i; - if (prog->aux->func_cnt == 0) { - perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, - (u64)(unsigned long)prog->bpf_func, - prog->jited_len, unregister, - prog->aux->ksym.name); - } else { - for (i = 0; i < prog->aux->func_cnt; i++) { - struct bpf_prog *subprog = prog->aux->func[i]; - - perf_event_ksymbol( - PERF_RECORD_KSYMBOL_TYPE_BPF, - (u64)(unsigned long)subprog->bpf_func, - subprog->jited_len, unregister, - subprog->aux->ksym.name); - } + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)prog->bpf_func, + prog->jited_len, unregister, + prog->aux->ksym.name); + + for (i = 1; i < prog->aux->func_cnt; i++) { + struct bpf_prog *subprog = prog->aux->func[i]; + + perf_event_ksymbol( + PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)subprog->bpf_func, + subprog->jited_len, unregister, + subprog->aux->ksym.name); } } diff --git a/kernel/fork.c b/kernel/fork.c index a8362c26ebcb..cc760491f201 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3404,7 +3404,7 @@ int unshare_files(void) return 0; } -int sysctl_max_threads(struct ctl_table *table, int write, +int sysctl_max_threads(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 6ca859715d8a..959d99583d1c 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -239,7 +239,7 @@ static long hung_timeout_jiffies(unsigned long last_checked, /* * Process updating of timeout sysctl */ -static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, +static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 98b9622d372e..fb2c77368d18 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -148,9 +148,6 @@ static unsigned int get_symbol_offset(unsigned long pos) unsigned long kallsyms_sym_address(int idx) { - if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE)) - return kallsyms_addresses[idx]; - /* values are unsigned offsets if --absolute-percpu is not in effect */ if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU)) return kallsyms_relative_base + (u32)kallsyms_offsets[idx]; @@ -325,7 +322,7 @@ static unsigned long get_symbol_pos(unsigned long addr, unsigned long symbol_start = 0, symbol_end = 0; unsigned long i, low, high, mid; - /* Do a binary search on the sorted kallsyms_addresses array. */ + /* Do a binary search on the sorted kallsyms_offsets array. */ low = 0; high = kallsyms_num_syms; diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h index 85480274fc8f..9633782f8250 100644 --- a/kernel/kallsyms_internal.h +++ b/kernel/kallsyms_internal.h @@ -4,12 +4,6 @@ #include <linux/types.h> -/* - * These will be re-linked against their real values during the second link - * stage. Preliminary values must be provided in the linker script using the - * PROVIDE() directive so that the first link stage can complete successfully. - */ -extern const unsigned long kallsyms_addresses[]; extern const int kallsyms_offsets[]; extern const u8 kallsyms_names[]; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9112d69d68b0..c0caa14880c3 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -888,7 +888,7 @@ struct kimage *kexec_crash_image; static int kexec_load_disabled; #ifdef CONFIG_SYSCTL -static int kexec_limit_handler(struct ctl_table *table, int write, +static int kexec_limit_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct kexec_load_limit *limit = table->data; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6a76a8100073..e85de37d9e1e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -939,7 +939,7 @@ static void unoptimize_all_kprobes(void) static DEFINE_MUTEX(kprobe_sysctl_mutex); static int sysctl_kprobes_optimization; -static int proc_kprobes_optimization_handler(struct ctl_table *table, +static int proc_kprobes_optimization_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 84c53285f499..7a75eab9c179 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -65,7 +65,7 @@ static struct latency_record latency_record[MAXLR]; int latencytop_enabled; #ifdef CONFIG_SYSCTL -static int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, +static int sysctl_latencytop(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int err; diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 52426665eecc..3c21c31796db 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -346,6 +346,7 @@ int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs, * /sys/kernel/livepatch/<patch>/enabled * /sys/kernel/livepatch/<patch>/transition * /sys/kernel/livepatch/<patch>/force + * /sys/kernel/livepatch/<patch>/replace * /sys/kernel/livepatch/<patch>/<object> * /sys/kernel/livepatch/<patch>/<object>/patched * /sys/kernel/livepatch/<patch>/<object>/<function,sympos> @@ -401,7 +402,7 @@ static ssize_t enabled_show(struct kobject *kobj, struct klp_patch *patch; patch = container_of(kobj, struct klp_patch, kobj); - return snprintf(buf, PAGE_SIZE-1, "%d\n", patch->enabled); + return sysfs_emit(buf, "%d\n", patch->enabled); } static ssize_t transition_show(struct kobject *kobj, @@ -410,8 +411,7 @@ static ssize_t transition_show(struct kobject *kobj, struct klp_patch *patch; patch = container_of(kobj, struct klp_patch, kobj); - return snprintf(buf, PAGE_SIZE-1, "%d\n", - patch == klp_transition_patch); + return sysfs_emit(buf, "%d\n", patch == klp_transition_patch); } static ssize_t force_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -443,13 +443,24 @@ static ssize_t force_store(struct kobject *kobj, struct kobj_attribute *attr, return count; } +static ssize_t replace_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct klp_patch *patch; + + patch = container_of(kobj, struct klp_patch, kobj); + return sysfs_emit(buf, "%d\n", patch->replace); +} + static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled); static struct kobj_attribute transition_kobj_attr = __ATTR_RO(transition); static struct kobj_attribute force_kobj_attr = __ATTR_WO(force); +static struct kobj_attribute replace_kobj_attr = __ATTR_RO(replace); static struct attribute *klp_patch_attrs[] = { &enabled_kobj_attr.attr, &transition_kobj_attr.attr, &force_kobj_attr.attr, + &replace_kobj_attr.attr, NULL }; ATTRIBUTE_GROUPS(klp_patch); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index bdf0087d6442..d70ab49d5b4a 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -261,7 +261,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) } #ifdef CONFIG_CHECKPOINT_RESTORE -static int pid_ns_ctl_handler(struct ctl_table *table, int write, +static int pid_ns_ctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct pid_namespace *pid_ns = task_active_pid_ns(current); diff --git a/kernel/pid_sysctl.h b/kernel/pid_sysctl.h index fe9fb991dc42..18ecaef6be41 100644 --- a/kernel/pid_sysctl.h +++ b/kernel/pid_sysctl.h @@ -5,7 +5,7 @@ #include <linux/pid_namespace.h> #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) -static int pid_mfd_noexec_dointvec_minmax(struct ctl_table *table, +static int pid_mfd_noexec_dointvec_minmax(const struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { struct pid_namespace *ns = task_active_pid_ns(current); diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 6c2afee5ef62..19dcc5832651 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -8,7 +8,7 @@ #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) void __init printk_sysctl_init(void); -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, +int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #else #define printk_sysctl_init() do { } while (0) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7d91593f0ecf..054c0e7784fd 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -197,7 +197,7 @@ __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) -int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, +int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { char old_str[DEVKMSG_STR_MAX_SIZE]; @@ -4372,15 +4372,15 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) EXPORT_SYMBOL_GPL(kmsg_dump_rewind); /** - * console_replay_all - replay kernel log on consoles + * console_try_replay_all - try to replay kernel log on consoles * * Try to obtain lock on console subsystem and replay all * available records in printk buffer on the consoles. * Does nothing if lock is not obtained. * - * Context: Any context. + * Context: Any, except for NMI. */ -void console_replay_all(void) +void console_try_replay_all(void) { if (console_trylock()) { __console_rewind_all(); diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c index 3e47dedce9e5..f5072dc85f7a 100644 --- a/kernel/printk/sysctl.c +++ b/kernel/printk/sysctl.c @@ -11,7 +11,7 @@ static const int ten_thousand = 10000; -static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, +static int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (write && !capable(CAP_SYS_ADMIN)) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ae5ef3013a55..a9f655025607 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1806,7 +1806,7 @@ static void uclamp_sync_util_min_rt_default(void) uclamp_update_util_min_rt_default(p); } -static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, +static int sysctl_sched_uclamp_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { bool update_root_tg = false; @@ -4392,7 +4392,7 @@ static void reset_memory_tiering(void) } } -static int sysctl_numa_balancing(struct ctl_table *table, int write, +static int sysctl_numa_balancing(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; @@ -4461,7 +4461,7 @@ out: __setup("schedstats=", setup_schedstats); #ifdef CONFIG_PROC_SYSCTL -static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, +static int sysctl_schedstats(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 63e49c8ffc4d..310523c1b9e3 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -26,9 +26,9 @@ int sysctl_sched_rt_runtime = 950000; #ifdef CONFIG_SYSCTL static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ; -static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table sched_rt_sysctls[] = { { @@ -2952,7 +2952,7 @@ static void sched_rt_do_global(void) raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); } -static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rt_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int old_period, old_runtime; @@ -2991,7 +2991,7 @@ undo: return ret; } -static int sched_rr_handler(struct ctl_table *table, int write, void *buffer, +static int sched_rr_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 784a0be81e84..76504b776d03 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -285,7 +285,7 @@ void rebuild_sched_domains_energy(void) } #ifdef CONFIG_PROC_SYSCTL -static int sched_energy_aware_handler(struct ctl_table *table, int write, +static int sched_energy_aware_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, state; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index dc51e521bc1d..385d48293a5f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -2431,7 +2431,7 @@ static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged, return audit_seccomp_actions_logged(new, old, !ret); } -static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, +static int seccomp_actions_logged_handler(const struct ctl_table *ro_table, int write, void *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/stackleak.c b/kernel/stackleak.c index 0f9712584913..39fd620a7db6 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -21,7 +21,7 @@ static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); #ifdef CONFIG_SYSCTL -static int stack_erasing_sysctl(struct ctl_table *table, int write, +static int stack_erasing_sysctl(const struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e4421594fc25..79e6cb1d5c48 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -256,7 +256,7 @@ static bool proc_first_pos_non_zero_ignore(loff_t *ppos, * * Returns 0 on success. */ -int proc_dostring(struct ctl_table *table, int write, +int proc_dostring(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (write) @@ -702,7 +702,7 @@ int do_proc_douintvec(const struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_dobool(struct ctl_table *table, int write, void *buffer, +int proc_dobool(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tmp; @@ -739,7 +739,7 @@ int proc_dobool(struct ctl_table *table, int write, void *buffer, * * Returns 0 on success. */ -int proc_dointvec(struct ctl_table *table, int write, void *buffer, +int proc_dointvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); @@ -758,7 +758,7 @@ int proc_dointvec(struct ctl_table *table, int write, void *buffer, * * Returns 0 on success. */ -int proc_douintvec(struct ctl_table *table, int write, void *buffer, +int proc_douintvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_douintvec(table, write, buffer, lenp, ppos, @@ -769,7 +769,7 @@ int proc_douintvec(struct ctl_table *table, int write, void *buffer, * Taint values can only be increased * This means we can safely use a temporary. */ -static int proc_taint(struct ctl_table *table, int write, +static int proc_taint(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; @@ -864,7 +864,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, * * Returns 0 on success or -EINVAL on write when the range check fails. */ -int proc_dointvec_minmax(struct ctl_table *table, int write, +int proc_dointvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { @@ -933,7 +933,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, * * Returns 0 on success or -ERANGE on write when the range check fails. */ -int proc_douintvec_minmax(struct ctl_table *table, int write, +int proc_douintvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_douintvec_minmax_conv_param param = { @@ -961,7 +961,7 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, * * Returns 0 on success or an error on write when the range check fails. */ -int proc_dou8vec_minmax(struct ctl_table *table, int write, +int proc_dou8vec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tmp; @@ -998,7 +998,7 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); #ifdef CONFIG_MAGIC_SYSRQ -static int sysrq_sysctl_handler(struct ctl_table *table, int write, +static int sysrq_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int tmp, ret; @@ -1115,7 +1115,7 @@ static int do_proc_doulongvec_minmax(const struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_doulongvec_minmax(struct ctl_table *table, int write, +int proc_doulongvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); @@ -1138,7 +1138,7 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, +int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_doulongvec_minmax(table, write, buffer, @@ -1259,14 +1259,14 @@ static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lv * * Returns 0 on success. */ -int proc_dointvec_jiffies(struct ctl_table *table, int write, +int proc_dointvec_jiffies(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_jiffies_conv,NULL); } -int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, +int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { @@ -1292,7 +1292,7 @@ int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, +int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table, write, buffer, lenp, ppos, @@ -1315,14 +1315,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer, +int proc_dointvec_ms_jiffies(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_ms_jiffies_conv, NULL); } -static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer, +static int proc_do_cad_pid(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct pid *new_pid; @@ -1361,7 +1361,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer, * * Returns 0 on success. */ -int proc_do_large_bitmap(struct ctl_table *table, int write, +int proc_do_large_bitmap(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int err = 0; @@ -1493,85 +1493,85 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, #else /* CONFIG_PROC_SYSCTL */ -int proc_dostring(struct ctl_table *table, int write, +int proc_dostring(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dobool(struct ctl_table *table, int write, +int proc_dobool(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec(struct ctl_table *table, int write, +int proc_dointvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_douintvec(struct ctl_table *table, int write, +int proc_douintvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_minmax(struct ctl_table *table, int write, +int proc_dointvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_douintvec_minmax(struct ctl_table *table, int write, +int proc_douintvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dou8vec_minmax(struct ctl_table *table, int write, +int proc_dou8vec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_jiffies(struct ctl_table *table, int write, +int proc_dointvec_jiffies(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, +int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, +int proc_dointvec_userhz_jiffies(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, +int proc_dointvec_ms_jiffies(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_minmax(struct ctl_table *table, int write, +int proc_doulongvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, +int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_do_large_bitmap(struct ctl_table *table, int write, +int proc_do_large_bitmap(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; @@ -1580,7 +1580,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, #endif /* CONFIG_PROC_SYSCTL */ #if defined(CONFIG_SYSCTL) -int proc_do_static_key(struct ctl_table *table, int write, +int proc_do_static_key(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct static_key *key = (struct static_key *)table->data; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 48288dd4a102..64b0d8a0aa0f 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -289,7 +289,7 @@ static void timers_update_migration(void) } #ifdef CONFIG_SYSCTL -static int timer_migration_handler(struct ctl_table *table, int write, +static int timer_migration_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 84413114db5c..8d57f7686bb0 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -475,9 +475,54 @@ static bool tmigr_check_lonely(struct tmigr_group *group) return bitmap_weight(&active, BIT_CNT) <= 1; } -typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, void *); +/** + * struct tmigr_walk - data required for walking the hierarchy + * @nextexp: Next CPU event expiry information which is handed into + * the timer migration code by the timer code + * (get_next_timer_interrupt()) + * @firstexp: Contains the first event expiry information when + * hierarchy is completely idle. When CPU itself was the + * last going idle, information makes sure, that CPU will + * be back in time. When using this value in the remote + * expiry case, firstexp is stored in the per CPU tmigr_cpu + * struct of CPU which expires remote timers. It is updated + * in top level group only. Be aware, there could occur a + * new top level of the hierarchy between the 'top level + * call' in tmigr_update_events() and the check for the + * parent group in walk_groups(). Then @firstexp might + * contain a value != KTIME_MAX even if it was not the + * final top level. This is not a problem, as the worst + * outcome is a CPU which might wake up a little early. + * @evt: Pointer to tmigr_event which needs to be queued (of idle + * child group) + * @childmask: groupmask of child group + * @remote: Is set, when the new timer path is executed in + * tmigr_handle_remote_cpu() + * @basej: timer base in jiffies + * @now: timer base monotonic + * @check: is set if there is the need to handle remote timers; + * required in tmigr_requires_handle_remote() only + * @tmc_active: this flag indicates, whether the CPU which triggers + * the hierarchy walk is !idle in the timer migration + * hierarchy. When the CPU is idle and the whole hierarchy is + * idle, only the first event of the top level has to be + * considered. + */ +struct tmigr_walk { + u64 nextexp; + u64 firstexp; + struct tmigr_event *evt; + u8 childmask; + bool remote; + unsigned long basej; + u64 now; + bool check; + bool tmc_active; +}; + +typedef bool (*up_f)(struct tmigr_group *, struct tmigr_group *, struct tmigr_walk *); -static void __walk_groups(up_f up, void *data, +static void __walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc) { struct tmigr_group *child = NULL, *group = tmc->tmgroup; @@ -490,64 +535,17 @@ static void __walk_groups(up_f up, void *data, child = group; group = group->parent; + data->childmask = child->groupmask; } while (group); } -static void walk_groups(up_f up, void *data, struct tmigr_cpu *tmc) +static void walk_groups(up_f up, struct tmigr_walk *data, struct tmigr_cpu *tmc) { lockdep_assert_held(&tmc->lock); __walk_groups(up, data, tmc); } -/** - * struct tmigr_walk - data required for walking the hierarchy - * @nextexp: Next CPU event expiry information which is handed into - * the timer migration code by the timer code - * (get_next_timer_interrupt()) - * @firstexp: Contains the first event expiry information when last - * active CPU of hierarchy is on the way to idle to make - * sure CPU will be back in time. - * @evt: Pointer to tmigr_event which needs to be queued (of idle - * child group) - * @childmask: childmask of child group - * @remote: Is set, when the new timer path is executed in - * tmigr_handle_remote_cpu() - */ -struct tmigr_walk { - u64 nextexp; - u64 firstexp; - struct tmigr_event *evt; - u8 childmask; - bool remote; -}; - -/** - * struct tmigr_remote_data - data required for remote expiry hierarchy walk - * @basej: timer base in jiffies - * @now: timer base monotonic - * @firstexp: returns expiry of the first timer in the idle timer - * migration hierarchy to make sure the timer is handled in - * time; it is stored in the per CPU tmigr_cpu struct of - * CPU which expires remote timers - * @childmask: childmask of child group - * @check: is set if there is the need to handle remote timers; - * required in tmigr_requires_handle_remote() only - * @tmc_active: this flag indicates, whether the CPU which triggers - * the hierarchy walk is !idle in the timer migration - * hierarchy. When the CPU is idle and the whole hierarchy is - * idle, only the first event of the top level has to be - * considered. - */ -struct tmigr_remote_data { - unsigned long basej; - u64 now; - u64 firstexp; - u8 childmask; - bool check; - bool tmc_active; -}; - /* * Returns the next event of the timerqueue @group->events * @@ -618,10 +616,9 @@ static u64 tmigr_next_groupevt_expires(struct tmigr_group *group) static bool tmigr_active_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { union tmigr_state curstate, newstate; - struct tmigr_walk *data = ptr; bool walk_done; u8 childmask; @@ -649,8 +646,7 @@ static bool tmigr_active_up(struct tmigr_group *group, } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)); - if ((walk_done == false) && group->parent) - data->childmask = group->childmask; + trace_tmigr_group_set_cpu_active(group, newstate, childmask); /* * The group is active (again). The group event might be still queued @@ -666,8 +662,6 @@ static bool tmigr_active_up(struct tmigr_group *group, */ group->groupevt.ignore = true; - trace_tmigr_group_set_cpu_active(group, newstate, childmask); - return walk_done; } @@ -675,7 +669,7 @@ static void __tmigr_cpu_activate(struct tmigr_cpu *tmc) { struct tmigr_walk data; - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; trace_tmigr_cpu_active(tmc); @@ -860,10 +854,8 @@ unlock: static bool tmigr_new_timer_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_walk *data = ptr; - return tmigr_update_events(group, child, data); } @@ -995,9 +987,8 @@ unlock: static bool tmigr_handle_remote_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_remote_data *data = ptr; struct tmigr_event *evt; unsigned long jif; u8 childmask; @@ -1034,12 +1025,10 @@ again: } /* - * Update of childmask for the next level and keep track of the expiry - * of the first event that needs to be handled (group->next_expiry was - * updated by tmigr_next_expired_groupevt(), next was set by - * tmigr_handle_remote_cpu()). + * Keep track of the expiry of the first event that needs to be handled + * (group->next_expiry was updated by tmigr_next_expired_groupevt(), + * next was set by tmigr_handle_remote_cpu()). */ - data->childmask = group->childmask; data->firstexp = group->next_expiry; raw_spin_unlock_irq(&group->lock); @@ -1055,12 +1044,12 @@ again: void tmigr_handle_remote(void) { struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - struct tmigr_remote_data data; + struct tmigr_walk data; if (tmigr_is_not_available(tmc)) return; - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; data.firstexp = KTIME_MAX; /* @@ -1068,7 +1057,7 @@ void tmigr_handle_remote(void) * in tmigr_handle_remote_up() anyway. Keep this check to speed up the * return when nothing has to be done. */ - if (!tmigr_check_migrator(tmc->tmgroup, tmc->childmask)) { + if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) { /* * If this CPU was an idle migrator, make sure to clear its wakeup * value so it won't chase timers that have already expired elsewhere. @@ -1097,9 +1086,8 @@ void tmigr_handle_remote(void) static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { - struct tmigr_remote_data *data = ptr; u8 childmask; childmask = data->childmask; @@ -1118,7 +1106,7 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, * group before reading the next_expiry value. */ if (group->parent && !data->tmc_active) - goto out; + return false; /* * The lock is required on 32bit architectures to read the variable @@ -1143,9 +1131,6 @@ static bool tmigr_requires_handle_remote_up(struct tmigr_group *group, raw_spin_unlock(&group->lock); } -out: - /* Update of childmask for the next level */ - data->childmask = group->childmask; return false; } @@ -1157,7 +1142,7 @@ out: bool tmigr_requires_handle_remote(void) { struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - struct tmigr_remote_data data; + struct tmigr_walk data; unsigned long jif; bool ret = false; @@ -1165,7 +1150,7 @@ bool tmigr_requires_handle_remote(void) return ret; data.now = get_jiffies_update(&jif); - data.childmask = tmc->childmask; + data.childmask = tmc->groupmask; data.firstexp = KTIME_MAX; data.tmc_active = !tmc->idle; data.check = false; @@ -1230,14 +1215,13 @@ u64 tmigr_cpu_new_timer(u64 nextexp) if (nextexp != tmc->cpuevt.nextevt.expires || tmc->cpuevt.ignore) { ret = tmigr_new_timer(tmc, nextexp); + /* + * Make sure the reevaluation of timers in idle path + * will not miss an event. + */ + WRITE_ONCE(tmc->wakeup, ret); } } - /* - * Make sure the reevaluation of timers in idle path will not miss an - * event. - */ - WRITE_ONCE(tmc->wakeup, ret); - trace_tmigr_cpu_new_timer_idle(tmc, nextexp); raw_spin_unlock(&tmc->lock); return ret; @@ -1245,10 +1229,9 @@ u64 tmigr_cpu_new_timer(u64 nextexp) static bool tmigr_inactive_up(struct tmigr_group *group, struct tmigr_group *child, - void *ptr) + struct tmigr_walk *data) { union tmigr_state curstate, newstate, childstate; - struct tmigr_walk *data = ptr; bool walk_done; u8 childmask; @@ -1299,9 +1282,10 @@ static bool tmigr_inactive_up(struct tmigr_group *group, WARN_ON_ONCE((newstate.migrator != TMIGR_NONE) && !(newstate.active)); - if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, - newstate.state)) + if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) { + trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); break; + } /* * The memory barrier is paired with the cmpxchg() in @@ -1317,22 +1301,6 @@ static bool tmigr_inactive_up(struct tmigr_group *group, /* Event Handling */ tmigr_update_events(group, child, data); - if (group->parent && (walk_done == false)) - data->childmask = group->childmask; - - /* - * data->firstexp was set by tmigr_update_events() and contains the - * expiry of the first global event which needs to be handled. It - * differs from KTIME_MAX if: - * - group is the top level group and - * - group is idle (which means CPU was the last active CPU in the - * hierarchy) and - * - there is a pending event in the hierarchy - */ - WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent); - - trace_tmigr_group_set_cpu_inactive(group, newstate, childmask); - return walk_done; } @@ -1341,7 +1309,7 @@ static u64 __tmigr_cpu_deactivate(struct tmigr_cpu *tmc, u64 nextexp) struct tmigr_walk data = { .nextexp = nextexp, .firstexp = KTIME_MAX, .evt = &tmc->cpuevt, - .childmask = tmc->childmask }; + .childmask = tmc->groupmask }; /* * If nextexp is KTIME_MAX, the CPU event will be ignored because the @@ -1400,7 +1368,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp) * the only one in the level 0 group; and if it is the * only one in level 0 group, but there are more than a * single group active on the way to top level) - * * nextevt - when CPU is offline and has to handle timer on his own + * * nextevt - when CPU is offline and has to handle timer on its own * or when on the way to top in every group only a single * child is active but @nextevt is before the lowest * next_expiry encountered while walking up to top level. @@ -1419,7 +1387,7 @@ u64 tmigr_quick_check(u64 nextevt) if (WARN_ON_ONCE(tmc->idle)) return nextevt; - if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->childmask)) + if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask)) return KTIME_MAX; do { @@ -1442,6 +1410,66 @@ u64 tmigr_quick_check(u64 nextevt) return KTIME_MAX; } +/* + * tmigr_trigger_active() - trigger a CPU to become active again + * + * This function is executed on a CPU which is part of cpu_online_mask, when the + * last active CPU in the hierarchy is offlining. With this, it is ensured that + * the other CPU is active and takes over the migrator duty. + */ +static long tmigr_trigger_active(void *unused) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + + WARN_ON_ONCE(!tmc->online || tmc->idle); + + return 0; +} + +static int tmigr_cpu_offline(unsigned int cpu) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + int migrator; + u64 firstexp; + + raw_spin_lock_irq(&tmc->lock); + tmc->online = false; + WRITE_ONCE(tmc->wakeup, KTIME_MAX); + + /* + * CPU has to handle the local events on his own, when on the way to + * offline; Therefore nextevt value is set to KTIME_MAX + */ + firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); + trace_tmigr_cpu_offline(tmc); + raw_spin_unlock_irq(&tmc->lock); + + if (firstexp != KTIME_MAX) { + migrator = cpumask_any_but(cpu_online_mask, cpu); + work_on_cpu(migrator, tmigr_trigger_active, NULL); + } + + return 0; +} + +static int tmigr_cpu_online(unsigned int cpu) +{ + struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + + /* Check whether CPU data was successfully initialized */ + if (WARN_ON_ONCE(!tmc->tmgroup)) + return -EINVAL; + + raw_spin_lock_irq(&tmc->lock); + trace_tmigr_cpu_online(tmc); + tmc->idle = timer_base_is_idle(); + if (!tmc->idle) + __tmigr_cpu_activate(tmc); + tmc->online = true; + raw_spin_unlock_irq(&tmc->lock); + return 0; +} + static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl, int node) { @@ -1514,21 +1542,25 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node, } static void tmigr_connect_child_parent(struct tmigr_group *child, - struct tmigr_group *parent) + struct tmigr_group *parent, + bool activate) { - union tmigr_state childstate; + struct tmigr_walk data; raw_spin_lock_irq(&child->lock); raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); child->parent = parent; - child->childmask = BIT(parent->num_children++); + child->groupmask = BIT(parent->num_children++); raw_spin_unlock(&parent->lock); raw_spin_unlock_irq(&child->lock); trace_tmigr_connect_child_parent(child); + if (!activate) + return; + /* * To prevent inconsistent states, active children need to be active in * the new parent as well. Inactive children are already marked inactive @@ -1544,21 +1576,24 @@ static void tmigr_connect_child_parent(struct tmigr_group *child, * child to the new parent. So tmigr_connect_child_parent() is * executed with the formerly top level group (child) and the newly * created group (parent). + * + * * It is ensured that the child is active, as this setup path is + * executed in hotplug prepare callback. This is exectued by an + * already connected and !idle CPU. Even if all other CPUs go idle, + * the CPU executing the setup will be responsible up to current top + * level group. And the next time it goes inactive, it will release + * the new childmask and parent to subsequent walkers through this + * @child. Therefore propagate active state unconditionally. */ - childstate.state = atomic_read(&child->migr_state); - if (childstate.migrator != TMIGR_NONE) { - struct tmigr_walk data; - - data.childmask = child->childmask; + data.childmask = child->groupmask; - /* - * There is only one new level per time. When connecting the - * child and the parent and set the child active when the parent - * is inactive, the parent needs to be the uppermost - * level. Otherwise there went something wrong! - */ - WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); - } + /* + * There is only one new level per time (which is protected by + * tmigr_mutex). When connecting the child and the parent and set the + * child active when the parent is inactive, the parent needs to be the + * uppermost level. Otherwise there went something wrong! + */ + WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); } static int tmigr_setup_groups(unsigned int cpu, unsigned int node) @@ -1611,12 +1646,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) * Update tmc -> group / child -> group connection */ if (i == 0) { - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); raw_spin_lock_irq(&group->lock); tmc->tmgroup = group; - tmc->childmask = BIT(group->num_children++); + tmc->groupmask = BIT(group->num_children++); raw_spin_unlock_irq(&group->lock); @@ -1626,7 +1661,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) continue; } else { child = stack[i - 1]; - tmigr_connect_child_parent(child, group); + /* Will be activated at online time */ + tmigr_connect_child_parent(child, group, false); } /* check if uppermost level was newly created */ @@ -1637,12 +1673,21 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) lvllist = &tmigr_level_list[top]; if (group->num_children == 1 && list_is_singular(lvllist)) { + /* + * The target CPU must never do the prepare work, except + * on early boot when the boot CPU is the target. Otherwise + * it may spuriously activate the old top level group inside + * the new one (nevertheless whether old top level group is + * active or not) and/or release an uninitialized childmask. + */ + WARN_ON_ONCE(cpu == raw_smp_processor_id()); + lvllist = &tmigr_level_list[top - 1]; list_for_each_entry(child, lvllist, list) { if (child->parent) continue; - tmigr_connect_child_parent(child, group); + tmigr_connect_child_parent(child, group, true); } } } @@ -1664,80 +1709,31 @@ static int tmigr_add_cpu(unsigned int cpu) return ret; } -static int tmigr_cpu_online(unsigned int cpu) -{ - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - int ret; - - /* First online attempt? Initialize CPU data */ - if (!tmc->tmgroup) { - raw_spin_lock_init(&tmc->lock); - - ret = tmigr_add_cpu(cpu); - if (ret < 0) - return ret; - - if (tmc->childmask == 0) - return -EINVAL; - - timerqueue_init(&tmc->cpuevt.nextevt); - tmc->cpuevt.nextevt.expires = KTIME_MAX; - tmc->cpuevt.ignore = true; - tmc->cpuevt.cpu = cpu; - - tmc->remote = false; - WRITE_ONCE(tmc->wakeup, KTIME_MAX); - } - raw_spin_lock_irq(&tmc->lock); - trace_tmigr_cpu_online(tmc); - tmc->idle = timer_base_is_idle(); - if (!tmc->idle) - __tmigr_cpu_activate(tmc); - tmc->online = true; - raw_spin_unlock_irq(&tmc->lock); - return 0; -} - -/* - * tmigr_trigger_active() - trigger a CPU to become active again - * - * This function is executed on a CPU which is part of cpu_online_mask, when the - * last active CPU in the hierarchy is offlining. With this, it is ensured that - * the other CPU is active and takes over the migrator duty. - */ -static long tmigr_trigger_active(void *unused) +static int tmigr_cpu_prepare(unsigned int cpu) { - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); + struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); + int ret = 0; - WARN_ON_ONCE(!tmc->online || tmc->idle); - - return 0; -} - -static int tmigr_cpu_offline(unsigned int cpu) -{ - struct tmigr_cpu *tmc = this_cpu_ptr(&tmigr_cpu); - int migrator; - u64 firstexp; + /* Not first online attempt? */ + if (tmc->tmgroup) + return ret; - raw_spin_lock_irq(&tmc->lock); - tmc->online = false; + raw_spin_lock_init(&tmc->lock); + timerqueue_init(&tmc->cpuevt.nextevt); + tmc->cpuevt.nextevt.expires = KTIME_MAX; + tmc->cpuevt.ignore = true; + tmc->cpuevt.cpu = cpu; + tmc->remote = false; WRITE_ONCE(tmc->wakeup, KTIME_MAX); - /* - * CPU has to handle the local events on his own, when on the way to - * offline; Therefore nextevt value is set to KTIME_MAX - */ - firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX); - trace_tmigr_cpu_offline(tmc); - raw_spin_unlock_irq(&tmc->lock); + ret = tmigr_add_cpu(cpu); + if (ret < 0) + return ret; - if (firstexp != KTIME_MAX) { - migrator = cpumask_any_but(cpu_online_mask, cpu); - work_on_cpu(migrator, tmigr_trigger_active, NULL); - } + if (tmc->groupmask == 0) + return -EINVAL; - return 0; + return ret; } static int __init tmigr_init(void) @@ -1796,6 +1792,11 @@ static int __init tmigr_init(void) tmigr_hierarchy_levels, TMIGR_CHILDREN_PER_GROUP, tmigr_crossnode_level); + ret = cpuhp_setup_state(CPUHP_TMIGR_PREPARE, "tmigr:prepare", + tmigr_cpu_prepare, NULL); + if (ret) + goto err; + ret = cpuhp_setup_state(CPUHP_AP_TMIGR_ONLINE, "tmigr:online", tmigr_cpu_online, tmigr_cpu_offline); if (ret) @@ -1807,4 +1808,4 @@ err: pr_err("Timer migration setup failed\n"); return ret; } -late_initcall(tmigr_init); +early_initcall(tmigr_init); diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h index 6c37d94a37d9..154accc7a543 100644 --- a/kernel/time/timer_migration.h +++ b/kernel/time/timer_migration.h @@ -22,7 +22,17 @@ struct tmigr_event { * struct tmigr_group - timer migration hierarchy group * @lock: Lock protecting the event information and group hierarchy * information during setup - * @parent: Pointer to the parent group + * @parent: Pointer to the parent group. Pointer is updated when a + * new hierarchy level is added because of a CPU coming + * online the first time. Once it is set, the pointer will + * not be removed or updated. When accessing parent pointer + * lock less to decide whether to abort a propagation or + * not, it is not a problem. The worst outcome is an + * unnecessary/early CPU wake up. But do not access parent + * pointer several times in the same 'action' (like + * activation, deactivation, check for remote expiry,...) + * without holding the lock as it is not ensured that value + * will not change. * @groupevt: Next event of the group which is only used when the * group is !active. The group event is then queued into * the parent timer queue. @@ -41,9 +51,8 @@ struct tmigr_event { * @num_children: Counter of group children to make sure the group is only * filled with TMIGR_CHILDREN_PER_GROUP; Required for setup * only - * @childmask: childmask of the group in the parent group; is set - * during setup and will never change; can be read - * lockless + * @groupmask: mask of the group in the parent group; is set during + * setup and will never change; can be read lockless * @list: List head that is added to the per level * tmigr_level_list; is required during setup when a * new group needs to be connected to the existing @@ -59,7 +68,7 @@ struct tmigr_group { unsigned int level; int numa_node; unsigned int num_children; - u8 childmask; + u8 groupmask; struct list_head list; }; @@ -79,7 +88,7 @@ struct tmigr_group { * hierarchy * @remote: Is set when timers of the CPU are expired remotely * @tmgroup: Pointer to the parent group - * @childmask: childmask of tmigr_cpu in the parent group + * @groupmask: mask of tmigr_cpu in the parent group * @wakeup: Stores the first timer when the timer migration * hierarchy is completely idle and remote expiry was done; * is returned to timer code in the idle path and is only @@ -92,7 +101,7 @@ struct tmigr_cpu { bool idle; bool remote; struct tmigr_group *tmgroup; - u8 childmask; + u8 groupmask; u64 wakeup; struct tmigr_event cpuevt; }; @@ -108,8 +117,8 @@ union tmigr_state { u32 state; /** * struct - split state of tmigr_group - * @active: Contains each childmask bit of the active children - * @migrator: Contains childmask of the child which is migrator + * @active: Contains each mask bit of the active children + * @migrator: Contains mask of the child which is migrator * @seq: Sequence counter needs to be increased when an update * to the tmigr_state is done. It prevents a race when * updates in the child groups are propagated in changed diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0f579430f02a..4c28dd177ca6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8735,7 +8735,7 @@ static bool is_permanent_ops_registered(void) } static int -ftrace_enable_sysctl(struct ctl_table *table, int write, +ftrace_enable_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = -ENODEV; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 578a49ff5c32..10cd38bce2f1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2767,7 +2767,7 @@ static void output_printk(struct trace_event_buffer *fbuffer) raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } -int tracepoint_printk_sysctl(struct ctl_table *table, int write, +int tracepoint_printk_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 3a2b46847c8b..42b0d998d103 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -2885,7 +2885,7 @@ err: return -ENODEV; } -static int set_max_user_events_sysctl(struct ctl_table *table, int write, +static int set_max_user_events_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5a48dba912ea..7f9572a37333 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -514,7 +514,7 @@ static const struct file_operations stack_trace_filter_fops = { #endif /* CONFIG_DYNAMIC_FTRACE */ int -stack_trace_sysctl(struct ctl_table *table, int write, void *buffer, +stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int was_enabled; diff --git a/kernel/umh.c b/kernel/umh.c index 598b3ffe1522..ff1f13a27d29 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -495,7 +495,7 @@ int call_usermodehelper(const char *path, char **argv, char **envp, int wait) EXPORT_SYMBOL(call_usermodehelper); #if defined(CONFIG_SYSCTL) -static int proc_cap_handler(struct ctl_table *table, int write, +static int proc_cap_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 04e4513f2985..7282f61a8650 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -30,7 +30,7 @@ static void *get_uts(const struct ctl_table *table) * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? */ -static int proc_do_uts_string(struct ctl_table *table, int write, +static int proc_do_uts_string(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table uts_table; diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index 1d5eadd9dd61..8b4f8cc2e0ec 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -216,12 +216,8 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(kallsyms_num_syms); VMCOREINFO_SYMBOL(kallsyms_token_table); VMCOREINFO_SYMBOL(kallsyms_token_index); -#ifdef CONFIG_KALLSYMS_BASE_RELATIVE VMCOREINFO_SYMBOL(kallsyms_offsets); VMCOREINFO_SYMBOL(kallsyms_relative_base); -#else - VMCOREINFO_SYMBOL(kallsyms_addresses); -#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ #endif /* CONFIG_KALLSYMS */ arch_crash_save_vmcoreinfo(); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 51915b44ac73..830a83895493 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -983,7 +983,7 @@ static void proc_watchdog_update(void) * -------------------|----------------------------------|------------------------------- * proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED */ -static int proc_watchdog_common(int which, struct ctl_table *table, int write, +static int proc_watchdog_common(int which, const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int err, old, *param = table->data; @@ -1010,7 +1010,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, /* * /proc/sys/kernel/watchdog */ -static int proc_watchdog(struct ctl_table *table, int write, +static int proc_watchdog(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED | @@ -1021,7 +1021,7 @@ static int proc_watchdog(struct ctl_table *table, int write, /* * /proc/sys/kernel/nmi_watchdog */ -static int proc_nmi_watchdog(struct ctl_table *table, int write, +static int proc_nmi_watchdog(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (!watchdog_hardlockup_available && write) @@ -1034,7 +1034,7 @@ static int proc_nmi_watchdog(struct ctl_table *table, int write, /* * /proc/sys/kernel/soft_watchdog */ -static int proc_soft_watchdog(struct ctl_table *table, int write, +static int proc_soft_watchdog(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED, @@ -1045,7 +1045,7 @@ static int proc_soft_watchdog(struct ctl_table *table, int write, /* * /proc/sys/kernel/watchdog_thresh */ -static int proc_watchdog_thresh(struct ctl_table *table, int write, +static int proc_watchdog_thresh(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int err, old; @@ -1068,7 +1068,7 @@ static int proc_watchdog_thresh(struct ctl_table *table, int write, * user to specify a mask that will include cpus that have not yet * been brought online, if desired. */ -static int proc_watchdog_cpumask(struct ctl_table *table, int write, +static int proc_watchdog_cpumask(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int err; |