aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c5
-rw-r--r--kernel/cgroup/cpuset.c38
-rw-r--r--kernel/cpu.c12
-rw-r--r--kernel/crash_reserve.c3
-rw-r--r--kernel/dma/debug.c5
-rw-r--r--kernel/events/core.c3
-rw-r--r--kernel/irq/irqdesc.c1
-rw-r--r--kernel/kallsyms.c55
-rw-r--r--kernel/kallsyms_selftest.c22
-rw-r--r--kernel/kcov.c15
-rw-r--r--kernel/kprobes.c4
-rw-r--r--kernel/locking/lockdep.c6
-rw-r--r--kernel/module/main.c41
-rw-r--r--kernel/padata.c7
-rw-r--r--kernel/panic.c8
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/trace/fgraph.c2
-rw-r--r--kernel/trace/ring_buffer.c12
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace.h23
-rw-r--r--kernel/trace/trace_events.c41
-rw-r--r--kernel/trace/trace_events_hist.c4
-rw-r--r--kernel/trace/trace_events_inject.c2
-rw-r--r--kernel/trace/trace_events_trigger.c6
-rw-r--r--kernel/trace/tracing_map.c6
-rw-r--r--kernel/workqueue.c50
26 files changed, 203 insertions, 172 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4cb5441ad75f..d8520095ca03 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -16884,8 +16884,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
spi = i / BPF_REG_SIZE;
if (exact != NOT_EXACT &&
- old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
- cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+ (i >= cur->allocated_stack ||
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+ cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
return false;
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 40ec4abaf440..4bd9e50bcc8e 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -233,6 +233,13 @@ static cpumask_var_t isolated_cpus;
static struct list_head remote_children;
/*
+ * A flag to force sched domain rebuild at the end of an operation while
+ * inhibiting it in the intermediate stages when set. Currently it is only
+ * set in hotplug code.
+ */
+static bool force_sd_rebuild;
+
+/*
* Partition root states:
*
* 0 - member (not a partition root)
@@ -1475,7 +1482,7 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
}
- if (rebuild_domains)
+ if (rebuild_domains && !force_sd_rebuild)
rebuild_sched_domains_locked();
}
@@ -1833,7 +1840,7 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
remote_partition_disable(child, tmp);
disable_cnt++;
}
- if (disable_cnt)
+ if (disable_cnt && !force_sd_rebuild)
rebuild_sched_domains_locked();
}
@@ -1991,6 +1998,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
part_error = PERR_CPUSEMPTY;
goto write_error;
}
+ /* Check newmask again, whether cpus are available for parent/cs */
+ nocpu |= tasks_nocpu_error(parent, cs, newmask);
/*
* partcmd_update with newmask:
@@ -2440,7 +2449,8 @@ get_css:
}
rcu_read_unlock();
- if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD))
+ if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) &&
+ !force_sd_rebuild)
rebuild_sched_domains_locked();
}
@@ -2523,7 +2533,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
*/
if (!*buf) {
cpumask_clear(trialcs->cpus_allowed);
- cpumask_clear(trialcs->effective_xcpus);
+ if (cpumask_empty(trialcs->exclusive_cpus))
+ cpumask_clear(trialcs->effective_xcpus);
} else {
retval = cpulist_parse(buf, trialcs->cpus_allowed);
if (retval < 0)
@@ -3101,7 +3112,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
cs->flags = trialcs->flags;
spin_unlock_irq(&callback_lock);
- if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
+ if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed &&
+ !force_sd_rebuild)
rebuild_sched_domains_locked();
if (spread_flag_changed)
@@ -4498,11 +4510,9 @@ hotplug_update_tasks(struct cpuset *cs,
update_tasks_nodemask(cs);
}
-static bool force_rebuild;
-
void cpuset_force_rebuild(void)
{
- force_rebuild = true;
+ force_sd_rebuild = true;
}
/**
@@ -4650,15 +4660,9 @@ static void cpuset_handle_hotplug(void)
!cpumask_empty(subpartitions_cpus);
mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
- /*
- * In the rare case that hotplug removes all the cpus in
- * subpartitions_cpus, we assumed that cpus are updated.
- */
- if (!cpus_updated && !cpumask_empty(subpartitions_cpus))
- cpus_updated = true;
-
/* For v1, synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) {
+ cpuset_force_rebuild();
spin_lock_irq(&callback_lock);
if (!on_dfl)
cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
@@ -4714,8 +4718,8 @@ static void cpuset_handle_hotplug(void)
}
/* rebuild sched domains if cpus_allowed has changed */
- if (cpus_updated || force_rebuild) {
- force_rebuild = false;
+ if (force_sd_rebuild) {
+ force_sd_rebuild = false;
rebuild_sched_domains_cpuslocked();
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1209ddaec026..b1fd2a3db91a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2689,6 +2689,16 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
return ret;
}
+/**
+ * Check if the core a CPU belongs to is online
+ */
+#if !defined(topology_is_core_online)
+static inline bool topology_is_core_online(unsigned int cpu)
+{
+ return true;
+}
+#endif
+
int cpuhp_smt_enable(void)
{
int cpu, ret = 0;
@@ -2699,7 +2709,7 @@ int cpuhp_smt_enable(void)
/* Skip online CPUs and CPUs on offline nodes */
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
continue;
- if (!cpu_smt_thread_allowed(cpu))
+ if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu))
continue;
ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
if (ret)
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index d3b4cd12bdd1..64d44a52c011 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -423,7 +423,8 @@ retry:
if (high && search_end == CRASH_ADDR_HIGH_MAX) {
search_end = CRASH_ADDR_LOW_MAX;
search_base = 0;
- goto retry;
+ if (search_end != CRASH_ADDR_HIGH_MAX)
+ goto retry;
}
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
crash_size);
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index a6e3792b15f8..d570535342cb 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -416,8 +416,11 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry)
* dma_active_cacheline entry to track per event. dma_map_sg(), on the
* other hand, consumes a single dma_debug_entry, but inserts 'nents'
* entries into the tree.
+ *
+ * Use __GFP_NOWARN because the printk from an OOM, to netconsole, could end
+ * up right back in the DMA debugging code, leading to a deadlock.
*/
-static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC);
+static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC | __GFP_NOWARN);
static DEFINE_SPINLOCK(radix_lock);
#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index aa3450bdc227..c973e3c11e03 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9706,7 +9706,8 @@ static int __perf_event_overflow(struct perf_event *event,
ret = __perf_event_account_interrupt(event, throttle);
- if (event->prog && !bpf_overflow_handler(event, data, regs))
+ if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+ !bpf_overflow_handler(event, data, regs))
return ret;
/*
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 07e99c936ba5..1dee88ba0ae4 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -530,6 +530,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
flags = IRQD_AFFINITY_MANAGED |
IRQD_MANAGED_SHUTDOWN;
}
+ flags |= IRQD_AFFINITY_SET;
mask = &affinity->mask;
node = cpu_to_node(cpumask_first(mask));
affinity++;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index fb2c77368d18..a9a0ca605d4a 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -160,38 +160,6 @@ unsigned long kallsyms_sym_address(int idx)
return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
}
-static void cleanup_symbol_name(char *s)
-{
- char *res;
-
- if (!IS_ENABLED(CONFIG_LTO_CLANG))
- return;
-
- /*
- * LLVM appends various suffixes for local functions and variables that
- * must be promoted to global scope as part of LTO. This can break
- * hooking of static functions with kprobes. '.' is not a valid
- * character in an identifier in C. Suffixes only in LLVM LTO observed:
- * - foo.llvm.[0-9a-f]+
- */
- res = strstr(s, ".llvm.");
- if (res)
- *res = '\0';
-
- return;
-}
-
-static int compare_symbol_name(const char *name, char *namebuf)
-{
- /* The kallsyms_seqs_of_names is sorted based on names after
- * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled.
- * To ensure correct bisection in kallsyms_lookup_names(), do
- * cleanup_symbol_name(namebuf) before comparing name and namebuf.
- */
- cleanup_symbol_name(namebuf);
- return strcmp(name, namebuf);
-}
-
static unsigned int get_symbol_seq(int index)
{
unsigned int i, seq = 0;
@@ -219,7 +187,7 @@ static int kallsyms_lookup_names(const char *name,
seq = get_symbol_seq(mid);
off = get_symbol_offset(seq);
kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
- ret = compare_symbol_name(name, namebuf);
+ ret = strcmp(name, namebuf);
if (ret > 0)
low = mid + 1;
else if (ret < 0)
@@ -236,7 +204,7 @@ static int kallsyms_lookup_names(const char *name,
seq = get_symbol_seq(low - 1);
off = get_symbol_offset(seq);
kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
- if (compare_symbol_name(name, namebuf))
+ if (strcmp(name, namebuf))
break;
low--;
}
@@ -248,7 +216,7 @@ static int kallsyms_lookup_names(const char *name,
seq = get_symbol_seq(high + 1);
off = get_symbol_offset(seq);
kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
- if (compare_symbol_name(name, namebuf))
+ if (strcmp(name, namebuf))
break;
high++;
}
@@ -407,8 +375,7 @@ static int kallsyms_lookup_buildid(unsigned long addr,
if (modbuildid)
*modbuildid = NULL;
- ret = strlen(namebuf);
- goto found;
+ return strlen(namebuf);
}
/* See if it's in a module or a BPF JITed image. */
@@ -422,8 +389,6 @@ static int kallsyms_lookup_buildid(unsigned long addr,
ret = ftrace_mod_address_lookup(addr, symbolsize,
offset, modname, namebuf);
-found:
- cleanup_symbol_name(namebuf);
return ret;
}
@@ -450,8 +415,6 @@ const char *kallsyms_lookup(unsigned long addr,
int lookup_symbol_name(unsigned long addr, char *symname)
{
- int res;
-
symname[0] = '\0';
symname[KSYM_NAME_LEN - 1] = '\0';
@@ -462,16 +425,10 @@ int lookup_symbol_name(unsigned long addr, char *symname)
/* Grab name */
kallsyms_expand_symbol(get_symbol_offset(pos),
symname, KSYM_NAME_LEN);
- goto found;
+ return 0;
}
/* See if it's in a module. */
- res = lookup_module_symbol_name(addr, symname);
- if (res)
- return res;
-
-found:
- cleanup_symbol_name(symname);
- return 0;
+ return lookup_module_symbol_name(addr, symname);
}
/* Look up a kernel symbol and return it in a text buffer. */
diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c
index 2f84896a7bcb..873f7c445488 100644
--- a/kernel/kallsyms_selftest.c
+++ b/kernel/kallsyms_selftest.c
@@ -187,31 +187,11 @@ static void test_perf_kallsyms_lookup_name(void)
stat.min, stat.max, div_u64(stat.sum, stat.real_cnt));
}
-static bool match_cleanup_name(const char *s, const char *name)
-{
- char *p;
- int len;
-
- if (!IS_ENABLED(CONFIG_LTO_CLANG))
- return false;
-
- p = strstr(s, ".llvm.");
- if (!p)
- return false;
-
- len = strlen(name);
- if (p - s != len)
- return false;
-
- return !strncmp(s, name, len);
-}
-
static int find_symbol(void *data, const char *name, unsigned long addr)
{
struct test_stat *stat = (struct test_stat *)data;
- if (strcmp(name, stat->name) == 0 ||
- (!stat->perf && match_cleanup_name(name, stat->name))) {
+ if (!strcmp(name, stat->name)) {
stat->real_cnt++;
stat->addr = addr;
diff --git a/kernel/kcov.c b/kernel/kcov.c
index f0a69d402066..274b6b7c718d 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct kcov_remote_area *area,
kmsan_unpoison_memory(&area->list, sizeof(area->list));
}
+/*
+ * Unlike in_serving_softirq(), this function returns false when called during
+ * a hardirq or an NMI that happened in the softirq context.
+ */
+static inline bool in_softirq_really(void)
+{
+ return in_serving_softirq() && !in_hardirq() && !in_nmi();
+}
+
static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
{
unsigned int mode;
@@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
* so we ignore code executed in interrupts, unless we are in a remote
* coverage collection section in a softirq.
*/
- if (!in_task() && !(in_serving_softirq() && t->kcov_softirq))
+ if (!in_task() && !(in_softirq_really() && t->kcov_softirq))
return false;
mode = READ_ONCE(t->kcov_mode);
/*
@@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle)
if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
return;
- if (!in_task() && !in_serving_softirq())
+ if (!in_task() && !in_softirq_really())
return;
local_lock_irqsave(&kcov_percpu_data.lock, flags);
@@ -991,7 +1000,7 @@ void kcov_remote_stop(void)
int sequence;
unsigned long flags;
- if (!in_task() && !in_serving_softirq())
+ if (!in_task() && !in_softirq_really())
return;
local_lock_irqsave(&kcov_percpu_data.lock, flags);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e85de37d9e1e..da59c68df841 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1557,8 +1557,8 @@ static bool is_cfi_preamble_symbol(unsigned long addr)
if (lookup_symbol_name(addr, symbuf))
return false;
- return str_has_prefix("__cfi_", symbuf) ||
- str_has_prefix("__pfx_", symbuf);
+ return str_has_prefix(symbuf, "__cfi_") ||
+ str_has_prefix(symbuf, "__pfx_");
}
static int check_kprobe_address_safe(struct kprobe *p,
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 58c88220a478..0349f957e672 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
+ if (unlikely(lock->key == &__lockdep_no_track__))
+ return;
+
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) {
print_lock_contention_bug(curr, lock, ip);
@@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
+ if (unlikely(lock->key == &__lockdep_no_track__))
+ return;
+
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) {
print_lock_contention_bug(curr, lock, _RET_IP_);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index d9592195c5bb..71396e297499 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3104,7 +3104,7 @@ static bool idempotent(struct idempotent *u, const void *cookie)
struct idempotent *existing;
bool first;
- u->ret = 0;
+ u->ret = -EINTR;
u->cookie = cookie;
init_completion(&u->complete);
@@ -3140,7 +3140,7 @@ static int idempotent_complete(struct idempotent *u, int ret)
hlist_for_each_entry_safe(pos, next, head, entry) {
if (pos->cookie != cookie)
continue;
- hlist_del(&pos->entry);
+ hlist_del_init(&pos->entry);
pos->ret = ret;
complete(&pos->complete);
}
@@ -3148,6 +3148,28 @@ static int idempotent_complete(struct idempotent *u, int ret)
return ret;
}
+/*
+ * Wait for the idempotent worker.
+ *
+ * If we get interrupted, we need to remove ourselves from the
+ * the idempotent list, and the completion may still come in.
+ *
+ * The 'idem_lock' protects against the race, and 'idem.ret' was
+ * initialized to -EINTR and is thus always the right return
+ * value even if the idempotent work then completes between
+ * the wait_for_completion and the cleanup.
+ */
+static int idempotent_wait_for_completion(struct idempotent *u)
+{
+ if (wait_for_completion_interruptible(&u->complete)) {
+ spin_lock(&idem_lock);
+ if (!hlist_unhashed(&u->entry))
+ hlist_del(&u->entry);
+ spin_unlock(&idem_lock);
+ }
+ return u->ret;
+}
+
static int init_module_from_file(struct file *f, const char __user * uargs, int flags)
{
struct load_info info = { };
@@ -3183,15 +3205,16 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int
if (!f || !(f->f_mode & FMODE_READ))
return -EBADF;
- /* See if somebody else is doing the operation? */
- if (idempotent(&idem, file_inode(f))) {
- wait_for_completion(&idem.complete);
- return idem.ret;
+ /* Are we the winners of the race and get to do this? */
+ if (!idempotent(&idem, file_inode(f))) {
+ int ret = init_module_from_file(f, uargs, flags);
+ return idempotent_complete(&idem, ret);
}
- /* Otherwise, we'll do it and complete others */
- return idempotent_complete(&idem,
- init_module_from_file(f, uargs, flags));
+ /*
+ * Somebody else won the race and is loading the module.
+ */
+ return idempotent_wait_for_completion(&idem);
}
SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
diff --git a/kernel/padata.c b/kernel/padata.c
index 53f4bc912712..0fa6c2895460 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -517,6 +517,13 @@ void __init padata_do_multithreaded(struct padata_mt_job *job)
ps.chunk_size = max(ps.chunk_size, job->min_chunk);
ps.chunk_size = roundup(ps.chunk_size, job->align);
+ /*
+ * chunk_size can be 0 if the caller sets min_chunk to 0. So force it
+ * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().`
+ */
+ if (!ps.chunk_size)
+ ps.chunk_size = 1U;
+
list_for_each_entry(pw, &works, pw_list)
if (job->numa_aware) {
int old_node = atomic_read(&last_used_nid);
diff --git a/kernel/panic.c b/kernel/panic.c
index f861bedc1925..2a0449144f82 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -64,6 +64,8 @@ unsigned long panic_on_taint;
bool panic_on_taint_nousertaint = false;
static unsigned int warn_limit __read_mostly;
+bool panic_triggering_all_cpu_backtrace;
+
int panic_timeout = CONFIG_PANIC_TIMEOUT;
EXPORT_SYMBOL_GPL(panic_timeout);
@@ -253,8 +255,12 @@ void check_panic_on_warn(const char *origin)
*/
static void panic_other_cpus_shutdown(bool crash_kexec)
{
- if (panic_print & PANIC_PRINT_ALL_CPU_BT)
+ if (panic_print & PANIC_PRINT_ALL_CPU_BT) {
+ /* Temporary allow non-panic CPUs to write their backtraces. */
+ panic_triggering_all_cpu_backtrace = true;
trigger_all_cpu_backtrace();
+ panic_triggering_all_cpu_backtrace = false;
+ }
/*
* Note that smp_send_stop() is the usual SMP shutdown function,
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 054c0e7784fd..c22b07049c38 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2316,7 +2316,7 @@ asmlinkage int vprintk_emit(int facility, int level,
* non-panic CPUs are generating any messages, they will be
* silently dropped.
*/
- if (other_cpu_in_panic())
+ if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace)
return 0;
if (level == LOGLEVEL_SCHED) {
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index fc205ad167a9..d1d5ea2d0a1b 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -902,7 +902,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
i = *idx ? : task->curr_ret_stack;
while (i > 0) {
- ret_stack = get_ret_stack(current, i, &i);
+ ret_stack = get_ret_stack(task, i, &i);
if (!ret_stack)
break;
/*
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 28853966aa9a..cebd879a30cb 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -693,18 +693,6 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer,
}
/**
- * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer
- * @buffer: The ring_buffer to get the number of pages from
- * @cpu: The cpu of the ring_buffer to get the number of pages from
- *
- * Returns the number of pages used by a per_cpu buffer of the ring buffer.
- */
-size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu)
-{
- return buffer->buffers[cpu]->nr_pages;
-}
-
-/**
* ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer
* @buffer: The ring_buffer to get the number of pages from
* @cpu: The cpu of the ring_buffer to get the number of pages from
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 10cd38bce2f1..ebe7ce2f5f4a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
trace_access_unlock(iter->cpu_file);
if (ret < 0) {
- if (trace_empty(iter)) {
+ if (trace_empty(iter) && !iter->closed) {
if ((filp->f_flags & O_NONBLOCK))
return -EAGAIN;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8783bebd0562..bd3e3069300e 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1634,6 +1634,29 @@ static inline void *event_file_data(struct file *filp)
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
+/*
+ * When the trace_event_file is the filp->i_private pointer,
+ * it must be taken under the event_mutex lock, and then checked
+ * if the EVENT_FILE_FL_FREED flag is set. If it is, then the
+ * data pointed to by the trace_event_file can not be trusted.
+ *
+ * Use the event_file_file() to access the trace_event_file from
+ * the filp the first time under the event_mutex and check for
+ * NULL. If it is needed to be retrieved again and the event_mutex
+ * is still held, then the event_file_data() can be used and it
+ * is guaranteed to be valid.
+ */
+static inline struct trace_event_file *event_file_file(struct file *filp)
+{
+ struct trace_event_file *file;
+
+ lockdep_assert_held(&event_mutex);
+ file = READ_ONCE(file_inode(filp)->i_private);
+ if (!file || file->flags & EVENT_FILE_FL_FREED)
+ return NULL;
+ return file;
+}
+
extern const struct file_operations event_trigger_fops;
extern const struct file_operations event_hist_fops;
extern const struct file_operations event_hist_debug_fops;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6ef29eba90ce..7266ec2a4eea 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -992,18 +992,18 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
void event_file_get(struct trace_event_file *file)
{
- atomic_inc(&file->ref);
+ refcount_inc(&file->ref);
}
void event_file_put(struct trace_event_file *file)
{
- if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
+ if (WARN_ON_ONCE(!refcount_read(&file->ref))) {
if (file->flags & EVENT_FILE_FL_FREED)
kmem_cache_free(file_cachep, file);
return;
}
- if (atomic_dec_and_test(&file->ref)) {
+ if (refcount_dec_and_test(&file->ref)) {
/* Count should only go to zero when it is freed */
if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
return;
@@ -1386,12 +1386,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
char buf[4] = "0";
mutex_lock(&event_mutex);
- file = event_file_data(filp);
+ file = event_file_file(filp);
if (likely(file))
flags = file->flags;
mutex_unlock(&event_mutex);
- if (!file || flags & EVENT_FILE_FL_FREED)
+ if (!file)
return -ENODEV;
if (flags & EVENT_FILE_FL_ENABLED &&
@@ -1424,8 +1424,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
case 1:
ret = -ENODEV;
mutex_lock(&event_mutex);
- file = event_file_data(filp);
- if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) {
+ file = event_file_file(filp);
+ if (likely(file)) {
ret = tracing_update_buffers(file->tr);
if (ret < 0) {
mutex_unlock(&event_mutex);
@@ -1540,7 +1540,8 @@ enum {
static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct trace_event_call *call = event_file_data(m->private);
+ struct trace_event_file *file = event_file_data(m->private);
+ struct trace_event_call *call = file->event_call;
struct list_head *common_head = &ftrace_common_fields;
struct list_head *head = trace_get_fields(call);
struct list_head *node = v;
@@ -1572,7 +1573,8 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
static int f_show(struct seq_file *m, void *v)
{
- struct trace_event_call *call = event_file_data(m->private);
+ struct trace_event_file *file = event_file_data(m->private);
+ struct trace_event_call *call = file->event_call;
struct ftrace_event_field *field;
const char *array_descriptor;
@@ -1627,12 +1629,14 @@ static int f_show(struct seq_file *m, void *v)
static void *f_start(struct seq_file *m, loff_t *pos)
{
+ struct trace_event_file *file;
void *p = (void *)FORMAT_HEADER;
loff_t l = 0;
/* ->stop() is called even if ->start() fails */
mutex_lock(&event_mutex);
- if (!event_file_data(m->private))
+ file = event_file_file(m->private);
+ if (!file)
return ERR_PTR(-ENODEV);
while (l < *pos && p)
@@ -1706,8 +1710,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
trace_seq_init(s);
mutex_lock(&event_mutex);
- file = event_file_data(filp);
- if (file && !(file->flags & EVENT_FILE_FL_FREED))
+ file = event_file_file(filp);
+ if (file)
print_event_filter(file, s);
mutex_unlock(&event_mutex);
@@ -1736,9 +1740,13 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
return PTR_ERR(buf);
mutex_lock(&event_mutex);
- file = event_file_data(filp);
- if (file)
- err = apply_event_filter(file, buf);
+ file = event_file_file(filp);
+ if (file) {
+ if (file->flags & EVENT_FILE_FL_FREED)
+ err = -ENODEV;
+ else
+ err = apply_event_filter(file, buf);
+ }
mutex_unlock(&event_mutex);
kfree(buf);
@@ -2485,7 +2493,6 @@ static int event_callback(const char *name, umode_t *mode, void **data,
if (strcmp(name, "format") == 0) {
*mode = TRACE_MODE_READ;
*fops = &ftrace_event_format_fops;
- *data = call;
return 1;
}
@@ -2996,7 +3003,7 @@ trace_create_new_event(struct trace_event_call *call,
atomic_set(&file->tm_ref, 0);
INIT_LIST_HEAD(&file->triggers);
list_add(&file->list, &tr->events);
- event_file_get(file);
+ refcount_set(&file->ref, 1);
return file;
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 6ece1308d36a..5f9119eb7c67 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5601,7 +5601,7 @@ static int hist_show(struct seq_file *m, void *v)
mutex_lock(&event_mutex);
- event_file = event_file_data(m->private);
+ event_file = event_file_file(m->private);
if (unlikely(!event_file)) {
ret = -ENODEV;
goto out_unlock;
@@ -5880,7 +5880,7 @@ static int hist_debug_show(struct seq_file *m, void *v)
mutex_lock(&event_mutex);
- event_file = event_file_data(m->private);
+ event_file = event_file_file(m->private);
if (unlikely(!event_file)) {
ret = -ENODEV;
goto out_unlock;
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index 8650562bdaa9..a8f076809db4 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -299,7 +299,7 @@ event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt,
strim(buf);
mutex_lock(&event_mutex);
- file = event_file_data(filp);
+ file = event_file_file(filp);
if (file) {
call = file->event_call;
size = parse_entry(buf, call, &entry);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 4bec043c8690..a5e3d6acf1e1 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -159,7 +159,7 @@ static void *trigger_start(struct seq_file *m, loff_t *pos)
/* ->stop() is called even if ->start() fails */
mutex_lock(&event_mutex);
- event_file = event_file_data(m->private);
+ event_file = event_file_file(m->private);
if (unlikely(!event_file))
return ERR_PTR(-ENODEV);
@@ -213,7 +213,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
mutex_lock(&event_mutex);
- if (unlikely(!event_file_data(file))) {
+ if (unlikely(!event_file_file(file))) {
mutex_unlock(&event_mutex);
return -ENODEV;
}
@@ -293,7 +293,7 @@ static ssize_t event_trigger_regex_write(struct file *file,
strim(buf);
mutex_lock(&event_mutex);
- event_file = event_file_data(file);
+ event_file = event_file_file(file);
if (unlikely(!event_file)) {
mutex_unlock(&event_mutex);
kfree(buf);
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index a4dcf0f24352..3a56e7c8aa4f 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -454,7 +454,7 @@ static struct tracing_map_elt *get_free_elt(struct tracing_map *map)
struct tracing_map_elt *elt = NULL;
int idx;
- idx = atomic_inc_return(&map->next_elt);
+ idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts);
if (idx < map->max_elts) {
elt = *(TRACING_MAP_ELT(map->elts, idx));
if (map->ops && map->ops->elt_init)
@@ -699,7 +699,7 @@ void tracing_map_clear(struct tracing_map *map)
{
unsigned int i;
- atomic_set(&map->next_elt, -1);
+ atomic_set(&map->next_elt, 0);
atomic64_set(&map->hits, 0);
atomic64_set(&map->drops, 0);
@@ -783,7 +783,7 @@ struct tracing_map *tracing_map_create(unsigned int map_bits,
map->map_bits = map_bits;
map->max_elts = (1 << map_bits);
- atomic_set(&map->next_elt, -1);
+ atomic_set(&map->next_elt, 0);
map->map_size = (1 << (map_bits + 1));
map->ops = ops;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1745ca788ede..e7b005ff3750 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -377,7 +377,7 @@ struct workqueue_struct {
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
- struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
+ struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */
struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
};
@@ -897,7 +897,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits)
{
- return (v >> shift) & ((1 << bits) - 1);
+ return (v >> shift) & ((1U << bits) - 1);
}
static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data)
@@ -3351,7 +3351,6 @@ woke_up:
set_pf_worker(false);
ida_free(&pool->worker_ida, worker->id);
- WARN_ON_ONCE(!list_empty(&worker->entry));
return 0;
}
@@ -4167,7 +4166,6 @@ already_gone:
static bool __flush_work(struct work_struct *work, bool from_cancel)
{
struct wq_barrier barr;
- unsigned long data;
if (WARN_ON(!wq_online))
return false;
@@ -4185,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
* was queued on a BH workqueue, we also know that it was running in the
* BH context and thus can be busy-waited.
*/
- data = *work_data_bits(work);
- if (from_cancel &&
- !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) {
- /*
- * On RT, prevent a live lock when %current preempted soft
- * interrupt processing or prevents ksoftirqd from running by
- * keeping flipping BH. If the BH work item runs on a different
- * CPU then this has no effect other than doing the BH
- * disable/enable dance for nothing. This is copied from
- * kernel/softirq.c::tasklet_unlock_spin_wait().
- */
- while (!try_wait_for_completion(&barr.done)) {
- if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
- local_bh_disable();
- local_bh_enable();
- } else {
- cpu_relax();
+ if (from_cancel) {
+ unsigned long data = *work_data_bits(work);
+
+ if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) &&
+ (data & WORK_OFFQ_BH)) {
+ /*
+ * On RT, prevent a live lock when %current preempted
+ * soft interrupt processing or prevents ksoftirqd from
+ * running by keeping flipping BH. If the BH work item
+ * runs on a different CPU then this has no effect other
+ * than doing the BH disable/enable dance for nothing.
+ * This is copied from
+ * kernel/softirq.c::tasklet_unlock_spin_wait().
+ */
+ while (!try_wait_for_completion(&barr.done)) {
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ local_bh_disable();
+ local_bh_enable();
+ } else {
+ cpu_relax();
+ }
}
+ goto out_destroy;
}
- } else {
- wait_for_completion(&barr.done);
}
+ wait_for_completion(&barr.done);
+
+out_destroy:
destroy_work_on_stack(&barr.work);
return true;
}