aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditfilter.c5
-rw-r--r--kernel/bpf/arena.c16
-rw-r--r--kernel/bpf/bpf_local_storage.c4
-rw-r--r--kernel/bpf/core.c7
-rw-r--r--kernel/bpf/helpers.c99
-rw-r--r--kernel/bpf/ringbuf.c31
-rw-r--r--kernel/bpf/verifier.c86
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/gcov/gcc_4_7.c4
-rw-r--r--kernel/kallsyms.c23
-rw-r--r--kernel/kcov.c1
-rw-r--r--kernel/module/kallsyms.c25
-rw-r--r--kernel/pid_namespace.c1
-rw-r--r--kernel/printk/Makefile2
-rw-r--r--kernel/printk/conopt.c146
-rw-r--r--kernel/printk/console_cmdline.h6
-rw-r--r--kernel/printk/printk.c23
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/sched/deadline.c7
-rw-r--r--kernel/sched/fair.c12
-rw-r--r--kernel/sched/psi.c21
-rw-r--r--kernel/sched/sched.h1
-rw-r--r--kernel/sched/stats.h11
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/ftrace.c13
-rw-r--r--kernel/workqueue.c51
29 files changed, 329 insertions, 294 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index be8c680121e4..d6ef4f4f9cba 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -529,7 +529,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
entry->rule.buflen += f_val;
f->lsm_str = str;
err = security_audit_rule_init(f->type, f->op, str,
- (void **)&f->lsm_rule);
+ (void **)&f->lsm_rule,
+ GFP_KERNEL);
/* Keep currently invalid fields around in case they
* become valid after a policy reload. */
if (err == -EINVAL) {
@@ -799,7 +800,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df,
/* our own (refreshed) copy of lsm_rule */
ret = security_audit_rule_init(df->type, df->op, df->lsm_str,
- (void **)&df->lsm_rule);
+ (void **)&df->lsm_rule, GFP_KERNEL);
/* Keep currently invalid fields around in case they
* become valid after a policy reload. */
if (ret == -EINVAL) {
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 583ee4fe48ef..e52b3ad231b9 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map)
struct vma_list {
struct vm_area_struct *vma;
struct list_head head;
+ atomic_t mmap_count;
};
static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
@@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
vml = kmalloc(sizeof(*vml), GFP_KERNEL);
if (!vml)
return -ENOMEM;
+ atomic_set(&vml->mmap_count, 1);
vma->vm_private_data = vml;
vml->vma = vma;
list_add(&vml->head, &arena->vma_list);
return 0;
}
+static void arena_vm_open(struct vm_area_struct *vma)
+{
+ struct vma_list *vml = vma->vm_private_data;
+
+ atomic_inc(&vml->mmap_count);
+}
+
static void arena_vm_close(struct vm_area_struct *vma)
{
struct bpf_map *map = vma->vm_file->private_data;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
- struct vma_list *vml;
+ struct vma_list *vml = vma->vm_private_data;
+ if (!atomic_dec_and_test(&vml->mmap_count))
+ return;
guard(mutex)(&arena->lock);
- vml = vma->vm_private_data;
+ /* update link list under lock */
list_del(&vml->head);
vma->vm_private_data = NULL;
kfree(vml);
@@ -287,6 +298,7 @@ out:
}
static const struct vm_operations_struct arena_vm_ops = {
+ .open = arena_vm_open,
.close = arena_vm_close,
.fault = arena_vm_fault,
};
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 976cb258a0ed..c938dea5ddbf 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -782,8 +782,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
- smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
- nbuckets, GFP_USER | __GFP_NOWARN);
+ smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets,
+ sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN);
if (!smap->buckets) {
err = -ENOMEM;
goto free_smap;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a6c3faa6e4a..695a0fb2cd4d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -736,11 +736,11 @@ static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
return n ? container_of(n, struct bpf_ksym, tnode) : NULL;
}
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+int __bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
{
struct bpf_ksym *ksym;
- char *ret = NULL;
+ int ret = 0;
rcu_read_lock();
ksym = bpf_ksym_find(addr);
@@ -748,9 +748,8 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
unsigned long symbol_start = ksym->start;
unsigned long symbol_end = ksym->end;
- strscpy(sym, ksym->name, KSYM_NAME_LEN);
+ ret = strscpy(sym, ksym->name, KSYM_NAME_LEN);
- ret = sym;
if (size)
*size = symbol_end - symbol_start;
if (off)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2a69a9a36c0f..3243c83ef3e3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1084,7 +1084,10 @@ struct bpf_async_cb {
struct bpf_prog *prog;
void __rcu *callback_fn;
void *value;
- struct rcu_head rcu;
+ union {
+ struct rcu_head rcu;
+ struct work_struct delete_work;
+ };
u64 flags;
};
@@ -1107,6 +1110,7 @@ struct bpf_async_cb {
struct bpf_hrtimer {
struct bpf_async_cb cb;
struct hrtimer timer;
+ atomic_t cancelling;
};
struct bpf_work {
@@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work)
kfree_rcu(w, cb.rcu);
}
+static void bpf_timer_delete_work(struct work_struct *work)
+{
+ struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work);
+
+ /* Cancel the timer and wait for callback to complete if it was running.
+ * If hrtimer_cancel() can be safely called it's safe to call
+ * kfree_rcu(t) right after for both preallocated and non-preallocated
+ * maps. The async->cb = NULL was already done and no code path can see
+ * address 't' anymore. Timer if armed for existing bpf_hrtimer before
+ * bpf_timer_cancel_and_free will have been cancelled.
+ */
+ hrtimer_cancel(&t->timer);
+ kfree_rcu(t, cb.rcu);
+}
+
static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
enum bpf_async_type type)
{
@@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
clockid = flags & (MAX_CLOCKS - 1);
t = (struct bpf_hrtimer *)cb;
+ atomic_set(&t->cancelling, 0);
+ INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work);
hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
t->timer.function = bpf_timer_cb;
cb->value = (void *)async - map->record->timer_off;
@@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async)
BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
{
- struct bpf_hrtimer *t;
+ struct bpf_hrtimer *t, *cur_t;
+ bool inc = false;
int ret = 0;
if (in_nmi())
@@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
ret = -EINVAL;
goto out;
}
- if (this_cpu_read(hrtimer_running) == t) {
+
+ cur_t = this_cpu_read(hrtimer_running);
+ if (cur_t == t) {
/* If bpf callback_fn is trying to bpf_timer_cancel()
* its own timer the hrtimer_cancel() will deadlock
- * since it waits for callback_fn to finish
+ * since it waits for callback_fn to finish.
*/
ret = -EDEADLK;
goto out;
}
+
+ /* Only account in-flight cancellations when invoked from a timer
+ * callback, since we want to avoid waiting only if other _callbacks_
+ * are waiting on us, to avoid introducing lockups. Non-callback paths
+ * are ok, since nobody would synchronously wait for their completion.
+ */
+ if (!cur_t)
+ goto drop;
+ atomic_inc(&t->cancelling);
+ /* Need full barrier after relaxed atomic_inc */
+ smp_mb__after_atomic();
+ inc = true;
+ if (atomic_read(&cur_t->cancelling)) {
+ /* We're cancelling timer t, while some other timer callback is
+ * attempting to cancel us. In such a case, it might be possible
+ * that timer t belongs to the other callback, or some other
+ * callback waiting upon it (creating transitive dependencies
+ * upon us), and we will enter a deadlock if we continue
+ * cancelling and waiting for it synchronously, since it might
+ * do the same. Bail!
+ */
+ ret = -EDEADLK;
+ goto out;
+ }
+drop:
drop_prog_refcnt(&t->cb);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
@@ -1467,6 +1516,8 @@ out:
* if it was running.
*/
ret = ret ?: hrtimer_cancel(&t->timer);
+ if (inc)
+ atomic_dec(&t->cancelling);
rcu_read_unlock();
return ret;
}
@@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val)
if (!t)
return;
- /* Cancel the timer and wait for callback to complete if it was running.
- * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
- * right after for both preallocated and non-preallocated maps.
- * The async->cb = NULL was already done and no code path can
- * see address 't' anymore.
- *
- * Check that bpf_map_delete/update_elem() wasn't called from timer
- * callback_fn. In such case don't call hrtimer_cancel() (since it will
- * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
- * return -1). Though callback_fn is still running on this cpu it's
+ /* We check that bpf_map_delete/update_elem() was called from timer
+ * callback_fn. In such case we don't call hrtimer_cancel() (since it
+ * will deadlock) and don't call hrtimer_try_to_cancel() (since it will
+ * just return -1). Though callback_fn is still running on this cpu it's
* safe to do kfree(t) because bpf_timer_cb() read everything it needed
* from 't'. The bpf subprog callback_fn won't be able to access 't',
* since async->cb = NULL was already done. The timer will be
* effectively cancelled because bpf_timer_cb() will return
* HRTIMER_NORESTART.
+ *
+ * However, it is possible the timer callback_fn calling us armed the
+ * timer _before_ calling us, such that failing to cancel it here will
+ * cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
+ * Therefore, we _need_ to cancel any outstanding timers before we do
+ * kfree_rcu, even though no more timers can be armed.
+ *
+ * Moreover, we need to schedule work even if timer does not belong to
+ * the calling callback_fn, as on two different CPUs, we can end up in a
+ * situation where both sides run in parallel, try to cancel one
+ * another, and we end up waiting on both sides in hrtimer_cancel
+ * without making forward progress, since timer1 depends on time2
+ * callback to finish, and vice versa.
+ *
+ * CPU 1 (timer1_cb) CPU 2 (timer2_cb)
+ * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1)
+ *
+ * To avoid these issues, punt to workqueue context when we are in a
+ * timer callback.
*/
- if (this_cpu_read(hrtimer_running) != t)
- hrtimer_cancel(&t->timer);
- kfree_rcu(t, cb.rcu);
+ if (this_cpu_read(hrtimer_running))
+ queue_work(system_unbound_wq, &t->cb.delete_work);
+ else
+ bpf_timer_delete_work(&t->cb.delete_work);
}
/* This function is called by map_delete/update_elem for individual element and
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 0ee653a936ea..e20b90c36131 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -51,7 +51,8 @@ struct bpf_ringbuf {
* This prevents a user-space application from modifying the
* position and ruining in-kernel tracking. The permissions of the
* pages depend on who is producing samples: user-space or the
- * kernel.
+ * kernel. Note that the pending counter is placed in the same
+ * page as the producer, so that it shares the same cache line.
*
* Kernel-producer
* ---------------
@@ -70,6 +71,7 @@ struct bpf_ringbuf {
*/
unsigned long consumer_pos __aligned(PAGE_SIZE);
unsigned long producer_pos __aligned(PAGE_SIZE);
+ unsigned long pending_pos;
char data[] __aligned(PAGE_SIZE);
};
@@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
rb->mask = data_sz - 1;
rb->consumer_pos = 0;
rb->producer_pos = 0;
+ rb->pending_pos = 0;
return rb;
}
@@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
{
- unsigned long cons_pos, prod_pos, new_prod_pos, flags;
- u32 len, pg_off;
+ unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
struct bpf_ringbuf_hdr *hdr;
+ u32 len, pg_off, tmp_size, hdr_len;
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
return NULL;
@@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
spin_lock_irqsave(&rb->spinlock, flags);
}
+ pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
new_prod_pos = prod_pos + len;
- /* check for out of ringbuf space by ensuring producer position
- * doesn't advance more than (ringbuf_size - 1) ahead
+ while (pend_pos < prod_pos) {
+ hdr = (void *)rb->data + (pend_pos & rb->mask);
+ hdr_len = READ_ONCE(hdr->len);
+ if (hdr_len & BPF_RINGBUF_BUSY_BIT)
+ break;
+ tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
+ tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
+ pend_pos += tmp_size;
+ }
+ rb->pending_pos = pend_pos;
+
+ /* check for out of ringbuf space:
+ * - by ensuring producer position doesn't advance more than
+ * (ringbuf_size - 1) ahead
+ * - by ensuring oldest not yet committed record until newest
+ * record does not span more than (ringbuf_size - 1)
*/
- if (new_prod_pos - cons_pos > rb->mask) {
+ if (new_prod_pos - cons_pos > rb->mask ||
+ new_prod_pos - pend_pos > rb->mask) {
spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 36ef8e96787e..214a9fa8c6fb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4549,11 +4549,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
state->stack[spi].spilled_ptr.id = 0;
} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
env->bpf_capable) {
- struct bpf_reg_state fake_reg = {};
+ struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
- __mark_reg_known(&fake_reg, insn->imm);
- fake_reg.type = SCALAR_VALUE;
- save_register_state(env, state, spi, &fake_reg, size);
+ memset(tmp_reg, 0, sizeof(*tmp_reg));
+ __mark_reg_known(tmp_reg, insn->imm);
+ tmp_reg->type = SCALAR_VALUE;
+ save_register_state(env, state, spi, tmp_reg, size);
} else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -6235,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
}
reg->u32_min_value = 0;
reg->u32_max_value = U32_MAX;
+ reg->var_off = tnum_subreg(tnum_unknown);
}
static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
@@ -6279,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
reg->s32_max_value = s32_max;
reg->u32_min_value = (u32)s32_min;
reg->u32_max_value = (u32)s32_max;
+ reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
return;
}
@@ -12718,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b)
return res < a;
}
+static bool signed_add16_overflows(s16 a, s16 b)
+{
+ /* Do the add in u16, where overflow is well-defined */
+ s16 res = (s16)((u16)a + (u16)b);
+
+ if (b < 0)
+ return res > a;
+ return res < a;
+}
+
static bool signed_sub_overflows(s64 a, s64 b)
{
/* Do the sub in u64, where overflow is well-defined */
@@ -15113,7 +15126,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
struct bpf_reg_state *eq_branch_regs;
- struct bpf_reg_state fake_reg = {};
u8 opcode = BPF_OP(insn->code);
bool is_jmp32;
int pred = -1;
@@ -15179,7 +15191,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
return -EINVAL;
}
- src_reg = &fake_reg;
+ src_reg = &env->fake_reg[0];
+ memset(src_reg, 0, sizeof(*src_reg));
src_reg->type = SCALAR_VALUE;
__mark_reg_known(src_reg, insn->imm);
}
@@ -15239,10 +15252,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
&other_branch_regs[insn->src_reg],
dst_reg, src_reg, opcode, is_jmp32);
} else /* BPF_SRC(insn->code) == BPF_K */ {
+ /* reg_set_min_max() can mangle the fake_reg. Make a copy
+ * so that these are two different memory locations. The
+ * src_reg is not used beyond here in context of K.
+ */
+ memcpy(&env->fake_reg[1], &env->fake_reg[0],
+ sizeof(env->fake_reg[0]));
err = reg_set_min_max(env,
&other_branch_regs[insn->dst_reg],
- src_reg /* fake one */,
- dst_reg, src_reg /* same fake one */,
+ &env->fake_reg[0],
+ dst_reg, &env->fake_reg[1],
opcode, is_jmp32);
}
if (err)
@@ -17441,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
goto skip_inf_loop_check;
}
if (is_may_goto_insn_at(env, insn_idx)) {
- if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
+ if (sl->state.may_goto_depth != cur->may_goto_depth &&
+ states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
update_loop_entry(cur, &sl->state);
goto hit;
}
- goto skip_inf_loop_check;
}
if (calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
@@ -18723,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return new_prog;
}
+/*
+ * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
+ * jump offset by 'delta'.
+ */
+static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
+{
+ struct bpf_insn *insn = prog->insnsi;
+ u32 insn_cnt = prog->len, i;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ u8 code = insn->code;
+
+ if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
+ BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
+ continue;
+
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
+ if (i + 1 + insn->imm != tgt_idx)
+ continue;
+ if (signed_add32_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->imm += delta;
+ } else {
+ if (i + 1 + insn->off != tgt_idx)
+ continue;
+ if (signed_add16_overflows(insn->imm, delta))
+ return -ERANGE;
+ insn->off += delta;
+ }
+ }
+ return 0;
+}
+
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
u32 off, u32 cnt)
{
@@ -19997,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
stack_depth_extra = 8;
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
- insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
cnt = 4;
@@ -20313,7 +20368,7 @@ patch_map_ops_generic:
goto next_insn;
}
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
/* Implement bpf_get_smp_processor_id() inline. */
if (insn->imm == BPF_FUNC_get_smp_processor_id &&
prog->jit_requested && bpf_jit_supports_percpu_insn()) {
@@ -20539,6 +20594,13 @@ next_insn:
if (!new_prog)
return -ENOMEM;
env->prog = prog = new_prog;
+ /*
+ * If may_goto is a first insn of a prog there could be a jmp
+ * insn that points to it, hence adjust all such jmps to point
+ * to insn after BPF_ST that inits may_goto count.
+ * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
+ */
+ WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
}
/* Since poke tab is now finalized, publish aux to tracker. */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 563877d6c28b..3d2bf1d50a0c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1859,6 +1859,9 @@ static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return fals
void __init bringup_nonboot_cpus(unsigned int max_cpus)
{
+ if (!max_cpus)
+ return;
+
/* Try parallel bringup optimization if enabled */
if (cpuhp_bringup_cpus_parallel(max_cpus))
return;
@@ -2446,7 +2449,7 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* The caller needs to hold cpus read locked while calling this function.
* Return:
* On success:
- * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
+ * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN;
* 0 for all other states
* On failure: proper (negative) error code
*/
@@ -2469,7 +2472,7 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
ret = cpuhp_store_callbacks(state, name, startup, teardown,
multi_instance);
- dynstate = state == CPUHP_AP_ONLINE_DYN;
+ dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN;
if (ret > 0 && dynstate) {
state = ret;
ret = 0;
@@ -2500,8 +2503,8 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
out:
mutex_unlock(&cpuhp_state_mutex);
/*
- * If the requested state is CPUHP_AP_ONLINE_DYN, return the
- * dynamically allocated state in case of success.
+ * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN,
+ * return the dynamically allocated state in case of success.
*/
if (!ret && dynstate)
return state;
diff --git a/kernel/exit.c b/kernel/exit.c
index f95a2c1338a8..81fcee45d630 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -484,6 +484,8 @@ retry:
* Search through everything else, we should not get here often.
*/
for_each_process(g) {
+ if (atomic_read(&mm->mm_users) <= 1)
+ break;
if (g->flags & PF_KTHREAD)
continue;
for_each_thread(g, c) {
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 74a4ef1da9ad..fd75b4a484d7 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
#include <linux/mm.h>
#include "gcov.h"
-#if (__GNUC__ >= 10)
+#if (__GNUC__ >= 14)
+#define GCOV_COUNTERS 9
+#elif (__GNUC__ >= 10)
#define GCOV_COUNTERS 8
#elif (__GNUC__ >= 7)
#define GCOV_COUNTERS 9
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 22ea19a36e6e..98b9622d372e 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -388,12 +388,12 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
!!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
}
-static const char *kallsyms_lookup_buildid(unsigned long addr,
+static int kallsyms_lookup_buildid(unsigned long addr,
unsigned long *symbolsize,
unsigned long *offset, char **modname,
const unsigned char **modbuildid, char *namebuf)
{
- const char *ret;
+ int ret;
namebuf[KSYM_NAME_LEN - 1] = 0;
namebuf[0] = 0;
@@ -410,7 +410,7 @@ static const char *kallsyms_lookup_buildid(unsigned long addr,
if (modbuildid)
*modbuildid = NULL;
- ret = namebuf;
+ ret = strlen(namebuf);
goto found;
}
@@ -442,8 +442,13 @@ const char *kallsyms_lookup(unsigned long addr,
unsigned long *offset,
char **modname, char *namebuf)
{
- return kallsyms_lookup_buildid(addr, symbolsize, offset, modname,
- NULL, namebuf);
+ int ret = kallsyms_lookup_buildid(addr, symbolsize, offset, modname,
+ NULL, namebuf);
+
+ if (!ret)
+ return NULL;
+
+ return namebuf;
}
int lookup_symbol_name(unsigned long addr, char *symname)
@@ -478,19 +483,15 @@ static int __sprint_symbol(char *buffer, unsigned long address,
{
char *modname;
const unsigned char *buildid;
- const char *name;
unsigned long offset, size;
int len;
address += symbol_offset;
- name = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
+ len = kallsyms_lookup_buildid(address, &size, &offset, &modname, &buildid,
buffer);
- if (!name)
+ if (!len)
return sprintf(buffer, "0x%lx", address - symbol_offset);
- if (name != buffer)
- strcpy(buffer, name);
- len = strlen(buffer);
offset -= symbol_offset;
if (add_offset)
diff --git a/kernel/kcov.c b/kernel/kcov.c
index c3124f6d5536..f0a69d402066 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -632,6 +632,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
return -EINVAL;
kcov->mode = mode;
t->kcov = kcov;
+ t->kcov_mode = KCOV_MODE_REMOTE;
kcov->t = t;
kcov->remote = true;
kcov->remote_size = remote_arg->area_size;
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 62fb57bb9f16..bf65e0c3c86f 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -321,14 +321,15 @@ void * __weak dereference_module_function_descriptor(struct module *mod,
* For kallsyms to ask for address resolution. NULL means not found. Careful
* not to lock to avoid deadlock on oopses, simply disable preemption.
*/
-const char *module_address_lookup(unsigned long addr,
- unsigned long *size,
- unsigned long *offset,
- char **modname,
- const unsigned char **modbuildid,
- char *namebuf)
+int module_address_lookup(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char **modname,
+ const unsigned char **modbuildid,
+ char *namebuf)
{
- const char *ret = NULL;
+ const char *sym;
+ int ret = 0;
struct module *mod;
preempt_disable();
@@ -344,12 +345,10 @@ const char *module_address_lookup(unsigned long addr,
#endif
}
- ret = find_kallsyms_symbol(mod, addr, size, offset);
- }
- /* Make a copy in here where it's safe */
- if (ret) {
- strscpy(namebuf, ret, KSYM_NAME_LEN);
- ret = namebuf;
+ sym = find_kallsyms_symbol(mod, addr, size, offset);
+
+ if (sym)
+ ret = strscpy(namebuf, sym, KSYM_NAME_LEN);
}
preempt_enable();
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index dc48fecfa1dc..25f3cf679b35 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -218,6 +218,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
*/
do {
clear_thread_flag(TIF_SIGPENDING);
+ clear_thread_flag(TIF_NOTIFY_SIGNAL);
rc = kernel_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 040fe7d1eda2..39a2b61c7232 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y = printk.o conopt.o
+obj-y = printk.o
obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
obj-$(CONFIG_PRINTK_INDEX) += index.o
diff --git a/kernel/printk/conopt.c b/kernel/printk/conopt.c
deleted file mode 100644
index 9d507bac3657..000000000000
--- a/kernel/printk/conopt.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Kernel command line console options for hardware based addressing
- *
- * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
- * Author: Tony Lindgren <tony@atomide.com>
- */
-
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/errno.h>
-
-#include "console_cmdline.h"
-
-/*
- * Allow longer DEVNAME:0.0 style console naming such as abcd0000.serial:0.0
- * in addition to the legacy ttyS0 style naming.
- */
-#define CONSOLE_NAME_MAX 32
-
-#define CONSOLE_OPT_MAX 16
-#define CONSOLE_BRL_OPT_MAX 16
-
-struct console_option {
- char name[CONSOLE_NAME_MAX];
- char opt[CONSOLE_OPT_MAX];
- char brl_opt[CONSOLE_BRL_OPT_MAX];
- u8 has_brl_opt:1;
-};
-
-/* Updated only at console_setup() time, no locking needed */
-static struct console_option conopt[MAX_CMDLINECONSOLES];
-
-/**
- * console_opt_save - Saves kernel command line console option for driver use
- * @str: Kernel command line console name and option
- * @brl_opt: Braille console options
- *
- * Saves a kernel command line console option for driver subsystems to use for
- * adding a preferred console during init. Called from console_setup() only.
- *
- * Return: 0 on success, negative error code on failure.
- */
-int __init console_opt_save(const char *str, const char *brl_opt)
-{
- struct console_option *con;
- size_t namelen, optlen;
- const char *opt;
- int i;
-
- namelen = strcspn(str, ",");
- if (namelen == 0 || namelen >= CONSOLE_NAME_MAX)
- return -EINVAL;
-
- opt = str + namelen;
- if (*opt == ',')
- opt++;
-
- optlen = strlen(opt);
- if (optlen >= CONSOLE_OPT_MAX)
- return -EINVAL;
-
- for (i = 0; i < MAX_CMDLINECONSOLES; i++) {
- con = &conopt[i];
-
- if (con->name[0]) {
- if (!strncmp(str, con->name, namelen))
- return 0;
- continue;
- }
-
- /*
- * The name isn't terminated, only opt is. Empty opt is fine,
- * but brl_opt can be either empty or NULL. For more info, see
- * _braille_console_setup().
- */
- strscpy(con->name, str, namelen + 1);
- strscpy(con->opt, opt, CONSOLE_OPT_MAX);
- if (brl_opt) {
- strscpy(con->brl_opt, brl_opt, CONSOLE_BRL_OPT_MAX);
- con->has_brl_opt = 1;
- }
-
- return 0;
- }
-
- return -ENOMEM;
-}
-
-static struct console_option *console_opt_find(const char *name)
-{
- struct console_option *con;
- int i;
-
- for (i = 0; i < MAX_CMDLINECONSOLES; i++) {
- con = &conopt[i];
- if (!strcmp(name, con->name))
- return con;
- }
-
- return NULL;
-}
-
-/**
- * add_preferred_console_match - Adds a preferred console if a match is found
- * @match: Expected console on kernel command line, such as console=DEVNAME:0.0
- * @name: Name of the console character device to add such as ttyS
- * @idx: Index for the console
- *
- * Allows driver subsystems to add a console after translating the command
- * line name to the character device name used for the console. Options are
- * added automatically based on the kernel command line. Duplicate preferred
- * consoles are ignored by __add_preferred_console().
- *
- * Return: 0 on success, negative error code on failure.
- */
-int add_preferred_console_match(const char *match, const char *name,
- const short idx)
-{
- struct console_option *con;
- char *brl_opt = NULL;
-
- if (!match || !strlen(match) || !name || !strlen(name) ||
- idx < 0)
- return -EINVAL;
-
- con = console_opt_find(match);
- if (!con)
- return -ENOENT;
-
- /*
- * See __add_preferred_console(). It checks for NULL brl_options to set
- * the preferred_console flag. Empty brl_opt instead of NULL leads into
- * the preferred_console flag not set, and CON_CONSDEV not being set,
- * and the boot console won't get disabled at the end of console_setup().
- */
- if (con->has_brl_opt)
- brl_opt = con->brl_opt;
-
- console_opt_add_preferred_console(name, idx, con->opt, brl_opt);
-
- return 0;
-}
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index a125e0235589..3ca74ad391d6 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -2,12 +2,6 @@
#ifndef _CONSOLE_CMDLINE_H
#define _CONSOLE_CMDLINE_H
-#define MAX_CMDLINECONSOLES 8
-
-int console_opt_save(const char *str, const char *brl_opt);
-int console_opt_add_preferred_console(const char *name, const short idx,
- char *options, char *brl_options);
-
struct console_cmdline
{
char name[16]; /* Name of the driver */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 420fd310129d..dddb15f48d59 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -383,6 +383,9 @@ static int console_locked;
/*
* Array of consoles built from command line options (console=)
*/
+
+#define MAX_CMDLINECONSOLES 8
+
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
@@ -2500,17 +2503,6 @@ static int __init console_setup(char *str)
if (_braille_console_setup(&str, &brl_options))
return 1;
- /* Save the console for driver subsystem use */
- if (console_opt_save(str, brl_options))
- return 1;
-
- /* Flag register_console() to not call try_enable_default_console() */
- console_set_on_cmdline = 1;
-
- /* Don't attempt to parse a DEVNAME:0.0 style console */
- if (strchr(str, ':'))
- return 1;
-
/*
* Decode str into name, index, options.
*/
@@ -2541,13 +2533,6 @@ static int __init console_setup(char *str)
}
__setup("console=", console_setup);
-/* Only called from add_preferred_console_match() */
-int console_opt_add_preferred_console(const char *name, const short idx,
- char *options, char *brl_options)
-{
- return __add_preferred_console(name, idx, options, brl_options, true);
-}
-
/**
* add_preferred_console - add a device to the list of preferred consoles.
* @name: device name
@@ -3522,7 +3507,7 @@ void register_console(struct console *newcon)
* Note that a console with tty binding will have CON_CONSDEV
* flag set and will be first in the list.
*/
- if (preferred_console < 0 && !console_set_on_cmdline) {
+ if (preferred_console < 0) {
if (hlist_empty(&console_list) || !console_first()->device ||
console_first()->flags & CON_BOOT) {
try_enable_default_console(newcon);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bcf2c4cc0522..59ce0841eb1f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->prev_irq_time += irq_delta;
delta -= irq_delta;
- psi_account_irqtime(rq->curr, irq_delta);
delayacct_irq(rq->curr, irq_delta);
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
@@ -5665,7 +5664,7 @@ void sched_tick(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
- struct task_struct *curr = rq->curr;
+ struct task_struct *curr;
struct rq_flags rf;
unsigned long hw_pressure;
u64 resched_latency;
@@ -5677,6 +5676,9 @@ void sched_tick(void)
rq_lock(rq, &rf);
+ curr = rq->curr;
+ psi_account_irqtime(rq, curr, NULL);
+
update_rq_clock(rq);
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
@@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
++*switch_count;
migrate_disable_switch(rq, prev);
+ psi_account_irqtime(rq, prev, next);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index c75d1307d86d..9bedd148f007 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
* The replenish timer needs to be canceled. No
* problem if it fires concurrently: boosted threads
* are ignored in dl_task_timer().
+ *
+ * If the timer callback was running (hrtimer_try_to_cancel == -1),
+ * it will eventually call put_task_struct().
*/
- hrtimer_try_to_cancel(&p->dl.dl_timer);
+ if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 &&
+ !dl_server(&p->dl))
+ put_task_struct(p);
p->dl.dl_throttled = 0;
}
} else if (!dl_prio(p->normal_prio)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8a5b1ae0aa55..24dda708b699 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env)
break;
env->loop++;
- /*
- * We've more or less seen every task there is, call it quits
- * unless we haven't found any movable task yet.
- */
- if (env->loop > env->loop_max &&
- !(env->flags & LBF_ALL_PINNED))
+ /* We've more or less seen every task there is, call it quits */
+ if (env->loop > env->loop_max)
break;
/* take a breather every nr_migrate tasks */
@@ -11393,9 +11389,7 @@ more_balance:
if (env.flags & LBF_NEED_BREAK) {
env.flags &= ~LBF_NEED_BREAK;
- /* Stop if we tried all running tasks */
- if (env.loop < busiest->nr_running)
- goto more_balance;
+ goto more_balance;
}
/*
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 7b4aa5809c0f..507d7b8d79af 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
enum psi_states s;
u32 state_mask;
+ lockdep_assert_rq_held(cpu_rq(cpu));
groupc = per_cpu_ptr(group->pcpu, cpu);
/*
@@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
}
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-void psi_account_irqtime(struct task_struct *task, u32 delta)
+void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev)
{
- int cpu = task_cpu(task);
+ int cpu = task_cpu(curr);
struct psi_group *group;
struct psi_group_cpu *groupc;
- u64 now;
+ u64 now, irq;
+ s64 delta;
if (static_branch_likely(&psi_disabled))
return;
- if (!task->pid)
+ if (!curr->pid)
+ return;
+
+ lockdep_assert_rq_held(rq);
+ group = task_psi_group(curr);
+ if (prev && task_psi_group(prev) == group)
return;
now = cpu_clock(cpu);
+ irq = irq_time_read(cpu);
+ delta = (s64)(irq - rq->psi_irq_time);
+ if (delta < 0)
+ return;
+ rq->psi_irq_time = irq;
- group = task_psi_group(task);
do {
if (!group->enabled)
continue;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a831af102070..ef20c61004eb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1126,6 +1126,7 @@ struct rq {
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
+ u64 psi_irq_time;
#endif
#ifdef CONFIG_PARAVIRT
u64 prev_steal_time;
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 38f3698f5e5b..b02dfc322951 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se)
void psi_task_change(struct task_struct *task, int clear, int set);
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
bool sleep);
-void psi_account_irqtime(struct task_struct *task, u32 delta);
-
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev);
+#else
+static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
+ struct task_struct *prev) {}
+#endif /*CONFIG_IRQ_TIME_ACCOUNTING */
/*
* PSI tracks state that persists across sleeps, such as iowaits and
* memory stalls. As a result, it has to distinguish between sleeps,
@@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {}
static inline void psi_sched_switch(struct task_struct *prev,
struct task_struct *next,
bool sleep) {}
-static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {}
+static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr,
+ struct task_struct *prev) {}
#endif /* CONFIG_PSI */
#ifdef CONFIG_SCHED_INFO
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d7eee421d4bc..b696b85ac63e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,8 +46,8 @@ COND_SYSCALL(io_getevents_time32);
COND_SYSCALL(io_getevents);
COND_SYSCALL(io_pgetevents_time32);
COND_SYSCALL(io_pgetevents);
-COND_SYSCALL_COMPAT(io_pgetevents_time32);
COND_SYSCALL_COMPAT(io_pgetevents);
+COND_SYSCALL_COMPAT(io_pgetevents_time64);
COND_SYSCALL(io_uring_setup);
COND_SYSCALL(io_uring_enter);
COND_SYSCALL(io_uring_register);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 492c14aac642..b8ee320208d4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1285,6 +1285,8 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
struct hrtimer_clock_base *base;
unsigned long flags;
+ if (WARN_ON_ONCE(!timer->function))
+ return;
/*
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
* match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 166ad5444eea..721c3b221048 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1136,7 +1136,7 @@ config PREEMPTIRQ_DELAY_TEST
config SYNTH_EVENT_GEN_TEST
tristate "Test module for in-kernel synthetic event generation"
- depends on SYNTH_EVENTS
+ depends on SYNTH_EVENTS && m
help
This option creates a test module to check the base
functionality of in-kernel synthetic event definition and
@@ -1149,7 +1149,7 @@ config SYNTH_EVENT_GEN_TEST
config KPROBE_EVENT_GEN_TEST
tristate "Test module for in-kernel kprobe event generation"
- depends on KPROBE_EVENTS
+ depends on KPROBE_EVENTS && m
help
This option creates a test module to check the base
functionality of in-kernel kprobe event definition.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 65208d3b5ed9..eacab4020508 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6969,7 +6969,7 @@ allocate_ftrace_mod_map(struct module *mod,
return mod_map;
}
-static const char *
+static int
ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
unsigned long addr, unsigned long *size,
unsigned long *off, char *sym)
@@ -6990,21 +6990,18 @@ ftrace_func_address_lookup(struct ftrace_mod_map *mod_map,
*size = found_func->size;
if (off)
*off = addr - found_func->ip;
- if (sym)
- strscpy(sym, found_func->name, KSYM_NAME_LEN);
-
- return found_func->name;
+ return strscpy(sym, found_func->name, KSYM_NAME_LEN);
}
- return NULL;
+ return 0;
}
-const char *
+int
ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
unsigned long *off, char **modname, char *sym)
{
struct ftrace_mod_map *mod_map;
- const char *ret = NULL;
+ int ret = 0;
/* mod_map is freed via call_rcu() */
preempt_disable();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 003474c9a77d..3fbaecfc88c2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,6 +125,7 @@ enum wq_internal_consts {
HIGHPRI_NICE_LEVEL = MIN_NICE,
WQ_NAME_LEN = 32,
+ WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
};
/*
@@ -2742,6 +2743,26 @@ static void worker_detach_from_pool(struct worker *worker)
complete(detach_completion);
}
+static int format_worker_id(char *buf, size_t size, struct worker *worker,
+ struct worker_pool *pool)
+{
+ if (worker->rescue_wq)
+ return scnprintf(buf, size, "kworker/R-%s",
+ worker->rescue_wq->name);
+
+ if (pool) {
+ if (pool->cpu >= 0)
+ return scnprintf(buf, size, "kworker/%d:%d%s",
+ pool->cpu, worker->id,
+ pool->attrs->nice < 0 ? "H" : "");
+ else
+ return scnprintf(buf, size, "kworker/u%d:%d",
+ pool->id, worker->id);
+ } else {
+ return scnprintf(buf, size, "kworker/dying");
+ }
+}
+
/**
* create_worker - create a new workqueue worker
* @pool: pool the new worker will belong to
@@ -2758,7 +2779,6 @@ static struct worker *create_worker(struct worker_pool *pool)
{
struct worker *worker;
int id;
- char id_buf[23];
/* ID is needed to determine kthread name */
id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
@@ -2777,17 +2797,14 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->id = id;
if (!(pool->flags & POOL_BH)) {
- if (pool->cpu >= 0)
- snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
- pool->attrs->nice < 0 ? "H" : "");
- else
- snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
+ char id_buf[WORKER_ID_LEN];
+ format_worker_id(id_buf, sizeof(id_buf), worker, pool);
worker->task = kthread_create_on_node(worker_thread, worker,
- pool->node, "kworker/%s", id_buf);
+ pool->node, "%s", id_buf);
if (IS_ERR(worker->task)) {
if (PTR_ERR(worker->task) == -EINTR) {
- pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
+ pr_err("workqueue: Interrupted when creating a worker thread \"%s\"\n",
id_buf);
} else {
pr_err_once("workqueue: Failed to create a worker thread: %pe",
@@ -3350,7 +3367,6 @@ woke_up:
raw_spin_unlock_irq(&pool->lock);
set_pf_worker(false);
- set_task_comm(worker->task, "kworker/dying");
ida_free(&pool->worker_ida, worker->id);
worker_detach_from_pool(worker);
WARN_ON_ONCE(!list_empty(&worker->entry));
@@ -5542,6 +5558,7 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
static int init_rescuer(struct workqueue_struct *wq)
{
struct worker *rescuer;
+ char id_buf[WORKER_ID_LEN];
int ret;
if (!(wq->flags & WQ_MEM_RECLAIM))
@@ -5555,7 +5572,9 @@ static int init_rescuer(struct workqueue_struct *wq)
}
rescuer->rescue_wq = wq;
- rescuer->task = kthread_create(rescuer_thread, rescuer, "kworker/R-%s", wq->name);
+ format_worker_id(id_buf, sizeof(id_buf), rescuer, NULL);
+
+ rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf);
if (IS_ERR(rescuer->task)) {
ret = PTR_ERR(rescuer->task);
pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
@@ -6384,19 +6403,15 @@ void show_freezable_workqueues(void)
/* used to show worker information through /proc/PID/{comm,stat,status} */
void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
{
- int off;
-
- /* always show the actual comm */
- off = strscpy(buf, task->comm, size);
- if (off < 0)
- return;
-
/* stabilize PF_WQ_WORKER and worker pool association */
mutex_lock(&wq_pool_attach_mutex);
if (task->flags & PF_WQ_WORKER) {
struct worker *worker = kthread_data(task);
struct worker_pool *pool = worker->pool;
+ int off;
+
+ off = format_worker_id(buf, size, worker, pool);
if (pool) {
raw_spin_lock_irq(&pool->lock);
@@ -6415,6 +6430,8 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
}
raw_spin_unlock_irq(&pool->lock);
}
+ } else {
+ strscpy(buf, task->comm, size);
}
mutex_unlock(&wq_pool_attach_mutex);