diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 8 | ||||
-rw-r--r-- | kernel/bpf/memalloc.c | 44 | ||||
-rw-r--r-- | kernel/bpf/mprog.c | 13 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 21 | ||||
-rw-r--r-- | kernel/bpf/tcx.c | 8 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 14 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 5 | ||||
-rw-r--r-- | kernel/events/core.c | 39 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 4 | ||||
-rw-r--r-- | kernel/printk/printk.c | 8 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 3 | ||||
-rw-r--r-- | kernel/sched/fair.c | 90 | ||||
-rw-r--r-- | kernel/trace/fprobe.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 63 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 1 | ||||
-rw-r--r-- | kernel/workqueue.c | 24 |
16 files changed, 259 insertions, 92 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 21d2fa815e78..6f0d6fb6523f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2212,7 +2212,7 @@ __audit_reusename(const __user char *uptr) if (!n->name) continue; if (n->name->uptr == uptr) { - n->name->refcnt++; + atomic_inc(&n->name->refcnt); return n->name; } } @@ -2241,7 +2241,7 @@ void __audit_getname(struct filename *name) n->name = name; n->name_len = AUDIT_NAME_FULL; name->aname = n; - name->refcnt++; + atomic_inc(&name->refcnt); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2373,7 +2373,7 @@ out_alloc: return; if (name) { n->name = name; - name->refcnt++; + atomic_inc(&name->refcnt); } out: @@ -2500,7 +2500,7 @@ void __audit_inode_child(struct inode *parent, if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; - found_child->name->refcnt++; + atomic_inc(&found_child->name->refcnt); } } diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index cf1941516643..d93ddac283d4 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -965,37 +965,31 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } -/* Most of the logic is taken from setup_kmalloc_cache_index_table() */ static __init int bpf_mem_cache_adjust_size(void) { - unsigned int size, index; + unsigned int size; - /* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be - * up-to 256-bytes. + /* Adjusting the indexes in size_index() according to the object_size + * of underlying slab cache, so bpf_mem_alloc() will select a + * bpf_mem_cache with unit_size equal to the object_size of + * the underlying slab cache. + * + * The maximal value of KMALLOC_MIN_SIZE and __kmalloc_minalign() is + * 256-bytes, so only do adjustment for [8-bytes, 192-bytes]. */ - size = KMALLOC_MIN_SIZE; - if (size <= 192) - index = size_index[(size - 1) / 8]; - else - index = fls(size - 1) - 1; - for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8) - size_index[(size - 1) / 8] = index; + for (size = 192; size >= 8; size -= 8) { + unsigned int kmalloc_size, index; - /* The minimal alignment is 64-bytes, so disable 96-bytes cache and - * use 128-bytes cache instead. - */ - if (KMALLOC_MIN_SIZE >= 64) { - index = size_index[(128 - 1) / 8]; - for (size = 64 + 8; size <= 96; size += 8) - size_index[(size - 1) / 8] = index; - } + kmalloc_size = kmalloc_size_roundup(size); + if (kmalloc_size == size) + continue; - /* The minimal alignment is 128-bytes, so disable 192-bytes cache and - * use 256-bytes cache instead. - */ - if (KMALLOC_MIN_SIZE >= 128) { - index = fls(256 - 1) - 1; - for (size = 128 + 8; size <= 192; size += 8) + if (kmalloc_size <= 192) + index = size_index[(kmalloc_size - 1) / 8]; + else + index = fls(kmalloc_size - 1) - 1; + /* Only overwrite if necessary */ + if (size_index[(size - 1) / 8] != index) size_index[(size - 1) / 8] = index; } diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c index 32d2c4829eb8..1394168062e8 100644 --- a/kernel/bpf/mprog.c +++ b/kernel/bpf/mprog.c @@ -253,6 +253,9 @@ int bpf_mprog_attach(struct bpf_mprog_entry *entry, goto out; } idx = tidx; + } else if (bpf_mprog_total(entry) == bpf_mprog_max()) { + ret = -ERANGE; + goto out; } if (flags & BPF_F_BEFORE) { tidx = bpf_mprog_pos_before(entry, &rtuple); @@ -398,14 +401,16 @@ int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr, struct bpf_mprog_cp *cp; struct bpf_prog *prog; const u32 flags = 0; + u32 id, count = 0; + u64 revision = 1; int i, ret = 0; - u32 id, count; - u64 revision; if (attr->query.query_flags || attr->query.attach_flags) return -EINVAL; - revision = bpf_mprog_revision(entry); - count = bpf_mprog_total(entry); + if (entry) { + revision = bpf_mprog_revision(entry); + count = bpf_mprog_total(entry); + } if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) return -EFAULT; if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision))) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index eb01c31ed591..d77b2f8b9364 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3796,7 +3796,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; struct bpf_prog *prog; - u32 mask; int ret; if (CHECK_ATTR(BPF_PROG_ATTACH)) @@ -3805,10 +3804,16 @@ static int bpf_prog_attach(const union bpf_attr *attr) ptype = attach_type_to_prog_type(attr->attach_type); if (ptype == BPF_PROG_TYPE_UNSPEC) return -EINVAL; - mask = bpf_mprog_supported(ptype) ? - BPF_F_ATTACH_MASK_MPROG : BPF_F_ATTACH_MASK_BASE; - if (attr->attach_flags & ~mask) - return -EINVAL; + if (bpf_mprog_supported(ptype)) { + if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG) + return -EINVAL; + } else { + if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE) + return -EINVAL; + if (attr->relative_fd || + attr->expected_revision) + return -EINVAL; + } prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); if (IS_ERR(prog)) @@ -3878,6 +3883,10 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); } + } else if (attr->attach_flags || + attr->relative_fd || + attr->expected_revision) { + return -EINVAL; } switch (ptype) { @@ -3913,7 +3922,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return ret; } -#define BPF_PROG_QUERY_LAST_FIELD query.link_attach_flags +#define BPF_PROG_QUERY_LAST_FIELD query.revision static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) diff --git a/kernel/bpf/tcx.c b/kernel/bpf/tcx.c index 13f0b5dc8262..1338a13a8b64 100644 --- a/kernel/bpf/tcx.c +++ b/kernel/bpf/tcx.c @@ -123,7 +123,6 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { bool ingress = attr->query.attach_type == BPF_TCX_INGRESS; struct net *net = current->nsproxy->net_ns; - struct bpf_mprog_entry *entry; struct net_device *dev; int ret; @@ -133,12 +132,7 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) ret = -ENODEV; goto out; } - entry = tcx_entry_fetch(dev, ingress); - if (!entry) { - ret = -ENOENT; - goto out; - } - ret = bpf_mprog_query(attr, uattr, entry); + ret = bpf_mprog_query(attr, uattr, tcx_entry_fetch(dev, ingress)); out: rtnl_unlock(); return ret; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb78212fa5b2..873ade146f3d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4047,11 +4047,9 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) bitmap_from_u64(mask, bt_reg_mask(bt)); for_each_set_bit(i, mask, 32) { reg = &st->frame[0]->regs[i]; - if (reg->type != SCALAR_VALUE) { - bt_clear_reg(bt, i); - continue; - } - reg->precise = true; + bt_clear_reg(bt, i); + if (reg->type == SCALAR_VALUE) + reg->precise = true; } return 0; } @@ -14481,7 +14479,7 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum enforce_attach_type_range = tnum_unknown; const struct bpf_prog *prog = env->prog; struct bpf_reg_state *reg; - struct tnum range = tnum_range(0, 1); + struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; struct bpf_func_state *frame = env->cur_state->frame[0]; @@ -14529,8 +14527,8 @@ static int check_return_code(struct bpf_verifier_env *env) return -EINVAL; } - if (!tnum_in(tnum_const(0), reg->var_off)) { - verbose_invalid_scalar(env, reg, &range, "async callback", "R0"); + if (!tnum_in(const_0, reg->var_off)) { + verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0"); return -EINVAL; } return 0; diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index c487ffef6652..76db6c67e39a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -360,10 +360,9 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, } css_task_iter_end(&it); length = n; - /* now sort & (if procs) strip out duplicates */ + /* now sort & strip out duplicates (tgids or recycled thread PIDs) */ sort(array, length, sizeof(pid_t), cmppid, NULL); - if (type == CGROUP_FILE_PROCS) - length = pidlist_uniq(array, length); + length = pidlist_uniq(array, length); l = cgroup_pidlist_find_create(cgrp, type); if (!l) { diff --git a/kernel/events/core.c b/kernel/events/core.c index 4c72a41f11af..d0663b9324e7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1954,6 +1954,7 @@ static void perf_group_attach(struct perf_event *event) list_add_tail(&event->sibling_list, &group_leader->sibling_list); group_leader->nr_siblings++; + group_leader->group_generation++; perf_event__header_size(group_leader); @@ -2144,6 +2145,7 @@ static void perf_group_detach(struct perf_event *event) if (leader != event) { list_del_init(&event->sibling_list); event->group_leader->nr_siblings--; + event->group_leader->group_generation++; goto out; } @@ -5440,7 +5442,7 @@ static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event_context *ctx = leader->ctx; - struct perf_event *sub; + struct perf_event *sub, *parent; unsigned long flags; int n = 1; /* skip @nr */ int ret; @@ -5450,6 +5452,33 @@ static int __perf_read_group_add(struct perf_event *leader, return ret; raw_spin_lock_irqsave(&ctx->lock, flags); + /* + * Verify the grouping between the parent and child (inherited) + * events is still in tact. + * + * Specifically: + * - leader->ctx->lock pins leader->sibling_list + * - parent->child_mutex pins parent->child_list + * - parent->ctx->mutex pins parent->sibling_list + * + * Because parent->ctx != leader->ctx (and child_list nests inside + * ctx->mutex), group destruction is not atomic between children, also + * see perf_event_release_kernel(). Additionally, parent can grow the + * group. + * + * Therefore it is possible to have parent and child groups in a + * different configuration and summing over such a beast makes no sense + * what so ever. + * + * Reject this. + */ + parent = leader->parent; + if (parent && + (parent->group_generation != leader->group_generation || + parent->nr_siblings != leader->nr_siblings)) { + ret = -ECHILD; + goto unlock; + } /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -5483,8 +5512,9 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] = atomic64_read(&sub->lost_samples); } +unlock: raw_spin_unlock_irqrestore(&ctx->lock, flags); - return 0; + return ret; } static int perf_read_group(struct perf_event *event, @@ -5503,10 +5533,6 @@ static int perf_read_group(struct perf_event *event, values[0] = 1 + leader->nr_siblings; - /* - * By locking the child_mutex of the leader we effectively - * lock the child list of all siblings.. XXX explain how. - */ mutex_lock(&leader->child_mutex); ret = __perf_read_group_add(leader, read_format, values); @@ -13346,6 +13372,7 @@ static int inherit_group(struct perf_event *parent_event, !perf_get_aux_event(child_ctr, leader)) return -EINVAL; } + leader->group_generation = parent_event->group_generation; return 0; } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 87e9f7e2bdc0..0f12e0a97e43 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2647,7 +2647,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, memory_bm_free(bm, PG_UNSAFE_KEEP); /* Make a copy of zero_bm so it can be created in safe pages */ - error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY); + error = memory_bm_create(&tmp, GFP_ATOMIC, PG_SAFE); if (error) goto Free; @@ -2660,7 +2660,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm, goto Free; duplicate_memory_bitmap(zero_bm, &tmp); - memory_bm_free(&tmp, PG_UNSAFE_KEEP); + memory_bm_free(&tmp, PG_UNSAFE_CLEAR); /* At this point zero_bm is in safe pages and it can be used for restoring. */ if (nr_highmem > 0) { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7e0b4dd02398..0b3af1529778 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3740,12 +3740,18 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre seq = prb_next_seq(prb); + /* Flush the consoles so that records up to @seq are printed. */ + console_lock(); + console_unlock(); + for (;;) { diff = 0; /* * Hold the console_lock to guarantee safe access to - * console->seq. + * console->seq. Releasing console_lock flushes more + * records in case @seq is still not printed on all + * usable consoles. */ console_lock(); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 4492608b7d7f..458d359f5991 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -350,7 +350,8 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, * Except when the rq is capped by uclamp_max. */ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && - sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) { + sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq && + !sg_policy->need_freq_update) { next_f = sg_policy->next_freq; /* Restore cached freq as next_freq has changed */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cb225921bbca..df348aa55d3c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -664,6 +664,10 @@ void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta; } +/* + * Specifically: avg_runtime() + 0 must result in entity_eligible() := true + * For this to be so, the result of this function must have a left bias. + */ u64 avg_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; @@ -677,8 +681,12 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) load += weight; } - if (load) + if (load) { + /* sign flips effective floor / ceil */ + if (avg < 0) + avg -= (load - 1); avg = div_s64(avg, load); + } return cfs_rq->min_vruntime + avg; } @@ -864,14 +872,16 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) * * Which allows an EDF like search on (sub)trees. */ -static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq) { struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node; struct sched_entity *curr = cfs_rq->curr; struct sched_entity *best = NULL; + struct sched_entity *best_left = NULL; if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr))) curr = NULL; + best = curr; /* * Once selected, run a task until it either becomes non-eligible or @@ -892,33 +902,75 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) } /* - * If this entity has an earlier deadline than the previous - * best, take this one. If it also has the earliest deadline - * of its subtree, we're done. + * Now we heap search eligible trees for the best (min_)deadline */ - if (!best || deadline_gt(deadline, best, se)) { + if (!best || deadline_gt(deadline, best, se)) best = se; - if (best->deadline == best->min_deadline) - break; - } /* - * If the earlest deadline in this subtree is in the fully - * eligible left half of our space, go there. + * Every se in a left branch is eligible, keep track of the + * branch with the best min_deadline */ + if (node->rb_left) { + struct sched_entity *left = __node_2_se(node->rb_left); + + if (!best_left || deadline_gt(min_deadline, best_left, left)) + best_left = left; + + /* + * min_deadline is in the left branch. rb_left and all + * descendants are eligible, so immediately switch to the second + * loop. + */ + if (left->min_deadline == se->min_deadline) + break; + } + + /* min_deadline is at this node, no need to look right */ + if (se->deadline == se->min_deadline) + break; + + /* else min_deadline is in the right branch. */ + node = node->rb_right; + } + + /* + * We ran into an eligible node which is itself the best. + * (Or nr_running == 0 and both are NULL) + */ + if (!best_left || (s64)(best_left->min_deadline - best->deadline) > 0) + return best; + + /* + * Now best_left and all of its children are eligible, and we are just + * looking for deadline == min_deadline + */ + node = &best_left->run_node; + while (node) { + struct sched_entity *se = __node_2_se(node); + + /* min_deadline is the current node */ + if (se->deadline == se->min_deadline) + return se; + + /* min_deadline is in the left branch */ if (node->rb_left && __node_2_se(node->rb_left)->min_deadline == se->min_deadline) { node = node->rb_left; continue; } + /* else min_deadline is in the right branch */ node = node->rb_right; } + return NULL; +} - if (!best || (curr && deadline_gt(deadline, best, curr))) - best = curr; +static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) +{ + struct sched_entity *se = __pick_eevdf(cfs_rq); - if (unlikely(!best)) { + if (!se) { struct sched_entity *left = __pick_first_entity(cfs_rq); if (left) { pr_err("EEVDF scheduling fail, picking leftmost\n"); @@ -926,7 +978,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq) } } - return best; + return se; } #ifdef CONFIG_SCHED_DEBUG @@ -3605,6 +3657,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, */ deadline = div_s64(deadline * old_weight, weight); se->deadline = se->vruntime + deadline; + if (se != cfs_rq->curr) + min_deadline_cb_propagate(&se->run_node, NULL); } #ifdef CONFIG_SMP @@ -4919,10 +4973,12 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { - u64 vslice = calc_delta_fair(se->slice, se); - u64 vruntime = avg_vruntime(cfs_rq); + u64 vslice, vruntime = avg_vruntime(cfs_rq); s64 lag = 0; + se->slice = sysctl_sched_base_slice; + vslice = calc_delta_fair(se->slice, se); + /* * Due to how V is constructed as the weighted average of entities, * adding tasks with positive lag, or removing tasks with negative lag diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 3b21f4063258..881f90f0cbcf 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -189,7 +189,7 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) { int i, size; - if (num < 0) + if (num <= 0) return -EINVAL; if (!fp->exit_handler) { @@ -202,8 +202,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) size = fp->nr_maxactive; else size = num * num_possible_cpus() * 2; - if (size < 0) - return -E2BIG; + if (size <= 0) + return -EINVAL; fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler); if (!fp->rethook) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3d7a180a8427..a8fef6ab0872 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -705,6 +705,25 @@ static struct notifier_block trace_kprobe_module_nb = { .priority = 1 /* Invoked after kprobe module callback */ }; +static int count_symbols(void *data, unsigned long unused) +{ + unsigned int *count = data; + + (*count)++; + + return 0; +} + +static unsigned int number_of_same_symbols(char *func_name) +{ + unsigned int count; + + count = 0; + kallsyms_on_each_match_symbol(count_symbols, func_name, &count); + + return count; +} + static int __trace_kprobe_create(int argc, const char *argv[]) { /* @@ -836,6 +855,31 @@ static int __trace_kprobe_create(int argc, const char *argv[]) } } + if (symbol && !strchr(symbol, ':')) { + unsigned int count; + + count = number_of_same_symbols(symbol); + if (count > 1) { + /* + * Users should use ADDR to remove the ambiguity of + * using KSYM only. + */ + trace_probe_log_err(0, NON_UNIQ_SYMBOL); + ret = -EADDRNOTAVAIL; + + goto error; + } else if (count == 0) { + /* + * We can return ENOENT earlier than when register the + * kprobe. + */ + trace_probe_log_err(0, BAD_PROBE_ADDR); + ret = -ENOENT; + + goto error; + } + } + trace_probe_log_set_index(0); if (event) { ret = traceprobe_parse_event_name(&event, &group, gbuf, @@ -1695,6 +1739,7 @@ static int unregister_kprobe_event(struct trace_kprobe *tk) } #ifdef CONFIG_PERF_EVENTS + /* create a trace_kprobe, but don't add it to global lists */ struct trace_event_call * create_local_trace_kprobe(char *func, void *addr, unsigned long offs, @@ -1705,6 +1750,24 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, int ret; char *event; + if (func) { + unsigned int count; + + count = number_of_same_symbols(func); + if (count > 1) + /* + * Users should use addr to remove the ambiguity of + * using func only. + */ + return ERR_PTR(-EADDRNOTAVAIL); + else if (count == 0) + /* + * We can return ENOENT earlier than when register the + * kprobe. + */ + return ERR_PTR(-ENOENT); + } + /* * local trace_kprobes are not added to dyn_event, so they are never * searched in find_trace_kprobe(). Therefore, there is no concern of diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 02b432ae7513..850d9ecb6765 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -450,6 +450,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_MAXACT, "Invalid maxactive number"), \ C(MAXACT_TOO_BIG, "Maxactive is too big"), \ C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \ + C(NON_UNIQ_SYMBOL, "The symbol is not unique"), \ C(BAD_RETPROBE, "Retprobe address must be an function entry"), \ C(NO_TRACEPOINT, "Tracepoint is not found"), \ C(BAD_ADDR_SUFFIX, "Invalid probed address suffix"), \ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b9f053a5a5f0..a3522b70218d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2166,7 +2166,7 @@ static struct worker *create_worker(struct worker_pool *pool) { struct worker *worker; int id; - char id_buf[16]; + char id_buf[23]; /* ID is needed to determine kthread name */ id = ida_alloc(&pool->worker_ida, GFP_KERNEL); @@ -4600,12 +4600,22 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) } cpus_read_unlock(); + /* for unbound pwq, flush the pwq_release_worker ensures that the + * pwq_release_workfn() completes before calling kfree(wq). + */ + if (ret) + kthread_flush_worker(pwq_release_worker); + return ret; enomem: if (wq->cpu_pwq) { - for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(wq->cpu_pwq, cpu)); + for_each_possible_cpu(cpu) { + struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); + + if (pwq) + kmem_cache_free(pwq_cache, pwq); + } free_percpu(wq->cpu_pwq); wq->cpu_pwq = NULL; } @@ -5782,9 +5792,13 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) list_for_each_entry(wq, &workqueues, list) { if (!(wq->flags & WQ_UNBOUND)) continue; + /* creating multiple pwqs breaks ordering guarantee */ - if (wq->flags & __WQ_ORDERED) - continue; + if (!list_empty(&wq->pwqs)) { + if (wq->flags & __WQ_ORDERED_EXPLICIT) + continue; + wq->flags &= ~__WQ_ORDERED; + } ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); if (IS_ERR(ctx)) { |