diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup/cgroup.c | 37 | ||||
| -rw-r--r-- | kernel/exit.c | 2 | ||||
| -rw-r--r-- | kernel/module/internal.h | 13 | ||||
| -rw-r--r-- | kernel/module/kallsyms.c | 35 | ||||
| -rw-r--r-- | kernel/module/main.c | 9 | ||||
| -rw-r--r-- | kernel/time/posix-timers.c | 19 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 3 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 11 | ||||
| -rw-r--r-- | kernel/trace/trace_events_hist.c | 2 |
9 files changed, 83 insertions, 48 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1779ccddb734..13c8e91d7862 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -765,7 +765,8 @@ struct css_set init_css_set = { .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), - .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), + .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), + .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), /* @@ -1240,7 +1241,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_LIST_HEAD(&cset->threaded_csets); INIT_HLIST_NODE(&cset->hlist); INIT_LIST_HEAD(&cset->cgrp_links); - INIT_LIST_HEAD(&cset->mg_preload_node); + INIT_LIST_HEAD(&cset->mg_src_preload_node); + INIT_LIST_HEAD(&cset->mg_dst_preload_node); INIT_LIST_HEAD(&cset->mg_node); /* Copy the set of subsystem state objects generated in @@ -2597,21 +2599,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) */ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { - LIST_HEAD(preloaded); struct css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); - list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, + mg_src_preload_node) { + cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; + cset->mg_dst_cset = NULL; + list_del_init(&cset->mg_src_preload_node); + put_css_set_locked(cset); + } - list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, + mg_dst_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; - list_del_init(&cset->mg_preload_node); + list_del_init(&cset->mg_dst_preload_node); put_css_set_locked(cset); } @@ -2651,7 +2659,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, if (src_cset->dead) return; - if (!list_empty(&src_cset->mg_preload_node)) + if (!list_empty(&src_cset->mg_src_preload_node)) return; src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); @@ -2664,7 +2672,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); - list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); + list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } /** @@ -2689,7 +2697,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, - mg_preload_node) { + mg_src_preload_node) { struct css_set *dst_cset; struct cgroup_subsys *ss; int ssid; @@ -2708,7 +2716,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; src_cset->mg_dst_cgrp = NULL; - list_del_init(&src_cset->mg_preload_node); + list_del_init(&src_cset->mg_src_preload_node); put_css_set(src_cset); put_css_set(dst_cset); continue; @@ -2716,8 +2724,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) src_cset->mg_dst_cset = dst_cset; - if (list_empty(&dst_cset->mg_preload_node)) - list_add_tail(&dst_cset->mg_preload_node, + if (list_empty(&dst_cset->mg_dst_preload_node)) + list_add_tail(&dst_cset->mg_dst_preload_node, &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); @@ -2963,7 +2971,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; spin_lock_irq(&css_set_lock); - list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { + list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, + mg_src_preload_node) { struct task_struct *task, *ntask; /* all tasks in src_csets need to be migrated */ diff --git a/kernel/exit.c b/kernel/exit.c index f072959fcab7..64c938ce36fe 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -766,7 +766,7 @@ void __noreturn do_exit(long code) #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); - exit_itimers(tsk->signal); + exit_itimers(tsk); #endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); diff --git a/kernel/module/internal.h b/kernel/module/internal.h index bc5507ab8450..ec104c2950c3 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -11,6 +11,7 @@ #include <linux/mutex.h> #include <linux/rculist.h> #include <linux/rcupdate.h> +#include <linux/mm.h> #ifndef ARCH_SHF_SMALL #define ARCH_SHF_SMALL 0 @@ -30,11 +31,13 @@ * to ensure complete separation of code and data, but * only when CONFIG_STRICT_MODULE_RWX=y */ -#ifdef CONFIG_STRICT_MODULE_RWX -# define strict_align(X) PAGE_ALIGN(X) -#else -# define strict_align(X) (X) -#endif +static inline unsigned int strict_align(unsigned int size) +{ + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + return PAGE_ALIGN(size); + else + return size; +} extern struct mutex module_mutex; extern struct list_head modules; diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 3e11523bc6f6..77e75bead569 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -137,6 +137,7 @@ void layout_symtab(struct module *mod, struct load_info *info) info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1); info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym); mod->data_layout.size += strtab_size; + /* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */ info->core_typeoffs = mod->data_layout.size; mod->data_layout.size += ndst * sizeof(char); mod->data_layout.size = strict_align(mod->data_layout.size); @@ -169,19 +170,20 @@ void add_kallsyms(struct module *mod, const struct load_info *info) Elf_Sym *dst; char *s; Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; + unsigned long strtab_size; /* Set up to point into init section. */ mod->kallsyms = (void __rcu *)mod->init_layout.base + info->mod_kallsyms_init_off; - preempt_disable(); + rcu_read_lock(); /* The following is safe since this pointer cannot change */ - rcu_dereference_sched(mod->kallsyms)->symtab = (void *)symsec->sh_addr; - rcu_dereference_sched(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr; + rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ - rcu_dereference_sched(mod->kallsyms)->strtab = + rcu_dereference(mod->kallsyms)->strtab = (void *)info->sechdrs[info->index.str].sh_addr; - rcu_dereference_sched(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs; + rcu_dereference(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs; /* * Now populate the cut down core kallsyms for after init @@ -190,22 +192,29 @@ void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs; mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs; mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs; - src = rcu_dereference_sched(mod->kallsyms)->symtab; - for (ndst = i = 0; i < rcu_dereference_sched(mod->kallsyms)->num_symtab; i++) { - rcu_dereference_sched(mod->kallsyms)->typetab[i] = elf_type(src + i, info); + strtab_size = info->core_typeoffs - info->stroffs; + src = rcu_dereference(mod->kallsyms)->symtab; + for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) { + rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { + ssize_t ret; + mod->core_kallsyms.typetab[ndst] = - rcu_dereference_sched(mod->kallsyms)->typetab[i]; + rcu_dereference(mod->kallsyms)->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; - s += strscpy(s, - &rcu_dereference_sched(mod->kallsyms)->strtab[src[i].st_name], - KSYM_NAME_LEN) + 1; + ret = strscpy(s, + &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name], + strtab_size); + if (ret < 0) + break; + s += ret + 1; + strtab_size -= ret + 1; } } - preempt_enable(); + rcu_read_unlock(); mod->core_kallsyms.num_symtab = ndst; } diff --git a/kernel/module/main.c b/kernel/module/main.c index fed58d30725d..0548151dd933 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2939,24 +2939,25 @@ static void cfi_init(struct module *mod) { #ifdef CONFIG_CFI_CLANG initcall_t *init; +#ifdef CONFIG_MODULE_UNLOAD exitcall_t *exit; +#endif rcu_read_lock_sched(); mod->cfi_check = (cfi_check_fn) find_kallsyms_symbol_value(mod, "__cfi_check"); init = (initcall_t *) find_kallsyms_symbol_value(mod, "__cfi_jt_init_module"); - exit = (exitcall_t *) - find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module"); - rcu_read_unlock_sched(); - /* Fix init/exit functions to point to the CFI jump table */ if (init) mod->init = *init; #ifdef CONFIG_MODULE_UNLOAD + exit = (exitcall_t *) + find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module"); if (exit) mod->exit = *exit; #endif + rcu_read_unlock_sched(); cfi_module_add(mod, mod_tree.addr_min); #endif diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1cd10b102c51..5dead89308b7 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1051,15 +1051,24 @@ retry_delete: } /* - * This is called by do_exit or de_thread, only when there are no more - * references to the shared signal_struct. + * This is called by do_exit or de_thread, only when nobody else can + * modify the signal->posix_timers list. Yet we need sighand->siglock + * to prevent the race with /proc/pid/timers. */ -void exit_itimers(struct signal_struct *sig) +void exit_itimers(struct task_struct *tsk) { + struct list_head timers; struct k_itimer *tmr; - while (!list_empty(&sig->posix_timers)) { - tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); + if (list_empty(&tsk->signal->posix_timers)) + return; + + spin_lock_irq(&tsk->sighand->siglock); + list_replace_init(&tsk->signal->posix_timers, &timers); + spin_unlock_irq(&tsk->sighand->siglock); + + while (!list_empty(&timers)) { + tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); } } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index debbbb083286..ccd6a5ade3e9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -194,7 +194,8 @@ config FUNCTION_TRACER sequence is then dynamically patched into a tracer call when tracing is enabled by the administrator. If it's runtime disabled (the bootup default), then the overhead of the instructions is very - small and not measurable even in micro-benchmarks. + small and not measurable even in micro-benchmarks (at least on + x86, but may have impact on other architectures). config FUNCTION_GRAPH_TRACER bool "Kernel Function Graph Tracer" diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a8cfac0611bc..b8dd54627075 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9864,6 +9864,12 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + + /* Can not use kmalloc for iter.temp and iter.fmt */ + iter->temp = static_temp_buf; + iter->temp_size = STATIC_TEMP_BUF_SIZE; + iter->fmt = static_fmt_buf; + iter->fmt_size = STATIC_FMT_BUF_SIZE; } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -9896,11 +9902,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp and iter.fmt */ - iter.temp = static_temp_buf; - iter.temp_size = STATIC_TEMP_BUF_SIZE; - iter.fmt = static_fmt_buf; - iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48e82e141d54..e87a46794079 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4430,6 +4430,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { + kfree(hist_data->attrs->var_defs.name[n_vars]); + hist_data->attrs->var_defs.name[n_vars] = NULL; ret = -ENOMEM; goto free; } |