diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 47 | ||||
-rw-r--r-- | kernel/compat.c | 24 | ||||
-rw-r--r-- | kernel/irq/chip.c | 1 | ||||
-rw-r--r-- | kernel/kcmp.c | 7 | ||||
-rw-r--r-- | kernel/kexec.c | 11 | ||||
-rw-r--r-- | kernel/power/power.h | 1 | ||||
-rw-r--r-- | kernel/power/suspend.c | 2 | ||||
-rw-r--r-- | kernel/power/suspend_test.c | 31 | ||||
-rw-r--r-- | kernel/printk/printk.c | 6 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 22 | ||||
-rw-r--r-- | kernel/resource.c | 11 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 14 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 5 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 16 |
15 files changed, 142 insertions, 58 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7dc8788cfd52..940aced4ed00 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1035,6 +1035,11 @@ static void cgroup_get(struct cgroup *cgrp) css_get(&cgrp->self); } +static bool cgroup_tryget(struct cgroup *cgrp) +{ + return css_tryget(&cgrp->self); +} + static void cgroup_put(struct cgroup *cgrp) { css_put(&cgrp->self); @@ -1147,7 +1152,8 @@ static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn) * protection against removal. Ensure @cgrp stays accessible and * break the active_ref protection. */ - cgroup_get(cgrp); + if (!cgroup_tryget(cgrp)) + return NULL; kernfs_break_active_protection(kn); mutex_lock(&cgroup_mutex); @@ -3271,8 +3277,17 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { struct cftype *cft; - for (cft = cfts; cft && cft->name[0] != '\0'; cft++) - cft->flags |= __CFTYPE_NOT_ON_DFL; + /* + * If legacy_flies_on_dfl, we want to show the legacy files on the + * dfl hierarchy but iff the target subsystem hasn't been updated + * for the dfl hierarchy yet. + */ + if (!cgroup_legacy_files_on_dfl || + ss->dfl_cftypes != ss->legacy_cftypes) { + for (cft = cfts; cft && cft->name[0] != '\0'; cft++) + cft->flags |= __CFTYPE_NOT_ON_DFL; + } + return cgroup_add_cftypes(ss, cfts); } @@ -4387,6 +4402,15 @@ static void css_release_work_fn(struct work_struct *work) /* cgroup release path */ cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); cgrp->id = -1; + + /* + * There are two control paths which try to determine + * cgroup from dentry without going through kernfs - + * cgroupstats_build() and css_tryget_online_from_dir(). + * Those are supported by RCU protecting clearing of + * cgrp->kn->priv backpointer. + */ + RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); } mutex_unlock(&cgroup_mutex); @@ -4543,6 +4567,11 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, struct cftype *base_files; int ssid, ret; + /* Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable. + */ + if (strchr(name, '\n')) + return -EINVAL; + parent = cgroup_kn_lock_live(parent_kn); if (!parent) return -ENODEV; @@ -4820,16 +4849,6 @@ static int cgroup_rmdir(struct kernfs_node *kn) cgroup_kn_unlock(kn); - /* - * There are two control paths which try to determine cgroup from - * dentry without going through kernfs - cgroupstats_build() and - * css_tryget_online_from_dir(). Those are supported by RCU - * protecting clearing of cgrp->kn->priv backpointer, which should - * happen after all files under it have been removed. - */ - if (!ret) - RCU_INIT_POINTER(*(void __rcu __force **)&kn->priv, NULL); - cgroup_put(cgrp); return ret; } @@ -5416,7 +5435,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, /* * This path doesn't originate from kernfs and @kn could already * have been or be removed at any point. @kn->priv is RCU - * protected for this access. See cgroup_rmdir() for details. + * protected for this access. See css_release_work_fn() for details. */ cgrp = rcu_dereference(kn->priv); if (cgrp) diff --git a/kernel/compat.c b/kernel/compat.c index 633394f442f8..ebb3c369d03d 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -226,7 +226,7 @@ static long compat_nanosleep_restart(struct restart_block *restart) ret = hrtimer_nanosleep_restart(restart); set_fs(oldfs); - if (ret) { + if (ret == -ERESTART_RESTARTBLOCK) { rmtp = restart->nanosleep.compat_rmtp; if (rmtp && compat_put_timespec(&rmt, rmtp)) @@ -256,7 +256,26 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); set_fs(oldfs); - if (ret) { + /* + * hrtimer_nanosleep() can only return 0 or + * -ERESTART_RESTARTBLOCK here because: + * + * - we call it with HRTIMER_MODE_REL and therefor exclude the + * -ERESTARTNOHAND return path. + * + * - we supply the rmtp argument from the task stack (due to + * the necessary compat conversion. So the update cannot + * fail, which excludes the -EFAULT return path as well. If + * it fails nevertheless we have a bigger problem and wont + * reach this place anymore. + * + * - if the return value is 0, we do not have to update rmtp + * because there is no remaining time. + * + * We check for -ERESTART_RESTARTBLOCK nevertheless if the + * core implementation decides to return random nonsense. + */ + if (ret == -ERESTART_RESTARTBLOCK) { struct restart_block *restart = ¤t_thread_info()->restart_block; @@ -266,7 +285,6 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, if (rmtp && compat_put_timespec(&rmt, rmtp)) return -EFAULT; } - return ret; } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index a2b28a2fd7b1..6223fab9a9d2 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -517,6 +517,7 @@ out: chip->irq_eoi(&desc->irq_data); raw_spin_unlock(&desc->lock); } +EXPORT_SYMBOL_GPL(handle_fasteoi_irq); /** * handle_edge_irq - edge type IRQ handler diff --git a/kernel/kcmp.c b/kernel/kcmp.c index e30ac0fe61c3..0aa69ea1d8fd 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -44,11 +44,12 @@ static long kptr_obfuscate(long v, int type) */ static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) { - long ret; + long t1, t2; - ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type); + t1 = kptr_obfuscate((long)v1, type); + t2 = kptr_obfuscate((long)v2, type); - return (ret < 0) | ((ret > 0) << 1); + return (t1 < t2) | ((t1 > t2) << 1); } /* The caller must have pinned the task */ diff --git a/kernel/kexec.c b/kernel/kexec.c index 0b49a0a58102..2bee072268d9 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -64,7 +64,9 @@ bool kexec_in_progress = false; char __weak kexec_purgatory[0]; size_t __weak kexec_purgatory_size = 0; +#ifdef CONFIG_KEXEC_FILE static int kexec_calculate_store_digests(struct kimage *image); +#endif /* Location of the reserved area for the crash kernel */ struct resource crashk_res = { @@ -341,6 +343,7 @@ out_free_image: return ret; } +#ifdef CONFIG_KEXEC_FILE static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len) { struct fd f = fdget(fd); @@ -612,6 +615,9 @@ out_free_image: kfree(image); return ret; } +#else /* CONFIG_KEXEC_FILE */ +static inline void kimage_file_post_load_cleanup(struct kimage *image) { } +#endif /* CONFIG_KEXEC_FILE */ static int kimage_is_destination_range(struct kimage *image, unsigned long start, @@ -1375,6 +1381,7 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, } #endif +#ifdef CONFIG_KEXEC_FILE SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) @@ -1451,6 +1458,8 @@ out: return ret; } +#endif /* CONFIG_KEXEC_FILE */ + void crash_kexec(struct pt_regs *regs) { /* Take the kexec_mutex here to prevent sys_kexec_load @@ -2006,6 +2015,7 @@ static int __init crash_save_vmcoreinfo_init(void) subsys_initcall(crash_save_vmcoreinfo_init); +#ifdef CONFIG_KEXEC_FILE static int __kexec_add_segment(struct kimage *image, char *buf, unsigned long bufsz, unsigned long mem, unsigned long memsz) @@ -2682,6 +2692,7 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, return 0; } +#endif /* CONFIG_KEXEC_FILE */ /* * Move into place and start executing a preloaded standalone diff --git a/kernel/power/power.h b/kernel/power/power.h index 5d49dcac2537..2df883a9d3cb 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -179,6 +179,7 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *, #ifdef CONFIG_SUSPEND /* kernel/power/suspend.c */ +extern const char *pm_labels[]; extern const char *pm_states[]; extern int suspend_devices_and_enter(suspend_state_t state); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 6dadb25cb0d8..18c62195660f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -31,7 +31,7 @@ #include "power.h" -static const char *pm_labels[] = { "mem", "standby", "freeze", }; +const char *pm_labels[] = { "mem", "standby", "freeze", NULL }; const char *pm_states[PM_SUSPEND_MAX]; static const struct platform_suspend_ops *suspend_ops; diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 2f524928b6aa..bd91bc177c93 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -129,20 +129,20 @@ static int __init has_wakealarm(struct device *dev, const void *data) * at startup time. They're normally disabled, for faster boot and because * we can't know which states really work on this particular system. */ -static suspend_state_t test_state __initdata = PM_SUSPEND_ON; +static const char *test_state_label __initdata; static char warn_bad_state[] __initdata = KERN_WARNING "PM: can't test '%s' suspend state\n"; static int __init setup_test_suspend(char *value) { - suspend_state_t i; + int i; /* "=mem" ==> "mem" */ value++; - for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) - if (!strcmp(pm_states[i], value)) { - test_state = i; + for (i = 0; pm_labels[i]; i++) + if (!strcmp(pm_labels[i], value)) { + test_state_label = pm_labels[i]; return 0; } @@ -158,13 +158,21 @@ static int __init test_suspend(void) struct rtc_device *rtc = NULL; struct device *dev; + suspend_state_t test_state; /* PM is initialized by now; is that state testable? */ - if (test_state == PM_SUSPEND_ON) - goto done; - if (!pm_states[test_state]) { - printk(warn_bad_state, pm_states[test_state]); - goto done; + if (!test_state_label) + return 0; + + for (test_state = PM_SUSPEND_MIN; test_state < PM_SUSPEND_MAX; test_state++) { + const char *state_label = pm_states[test_state]; + + if (state_label && !strcmp(test_state_label, state_label)) + break; + } + if (test_state == PM_SUSPEND_MAX) { + printk(warn_bad_state, test_state_label); + return 0; } /* RTCs have initialized by now too ... can we use one? */ @@ -173,13 +181,12 @@ static int __init test_suspend(void) rtc = rtc_class_open(dev_name(dev)); if (!rtc) { printk(warn_no_rtc); - goto done; + return 0; } /* go for it */ test_wakealarm(rtc, test_state); rtc_class_close(rtc); -done: return 0; } late_initcall(test_suspend); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e04c455a0e38..1ce770687ea8 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1665,15 +1665,15 @@ asmlinkage int vprintk_emit(int facility, int level, raw_spin_lock(&logbuf_lock); logbuf_cpu = this_cpu; - if (recursion_bug) { + if (unlikely(recursion_bug)) { static const char recursion_msg[] = "BUG: recent printk recursion!"; recursion_bug = 0; - text_len = strlen(recursion_msg); /* emit KERN_CRIT message */ printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, recursion_msg, text_len); + NULL, 0, recursion_msg, + strlen(recursion_msg)); } /* diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 71e64c718f75..6a86eb7bac45 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -358,7 +358,7 @@ struct rcu_data { struct rcu_head **nocb_gp_tail; long nocb_gp_count; long nocb_gp_count_lazy; - bool nocb_leader_wake; /* Is the nocb leader thread awake? */ + bool nocb_leader_sleep; /* Is the nocb leader thread asleep? */ struct rcu_data *nocb_next_follower; /* Next follower in wakeup chain. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 00dc411e9676..a7997e272564 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2074,9 +2074,9 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) return; - if (!ACCESS_ONCE(rdp_leader->nocb_leader_wake) || force) { + if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { /* Prior xchg orders against prior callback enqueue. */ - ACCESS_ONCE(rdp_leader->nocb_leader_wake) = true; + ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; wake_up(&rdp_leader->nocb_wq); } } @@ -2253,7 +2253,7 @@ wait_again: if (!rcu_nocb_poll) { trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); wait_event_interruptible(my_rdp->nocb_wq, - ACCESS_ONCE(my_rdp->nocb_leader_wake)); + !ACCESS_ONCE(my_rdp->nocb_leader_sleep)); /* Memory barrier handled by smp_mb() calls below and repoll. */ } else if (firsttime) { firsttime = false; /* Don't drown trace log with "Poll"! */ @@ -2292,12 +2292,12 @@ wait_again: schedule_timeout_interruptible(1); /* Rescan in case we were a victim of memory ordering. */ - my_rdp->nocb_leader_wake = false; - smp_mb(); /* Ensure _wake false before scan. */ + my_rdp->nocb_leader_sleep = true; + smp_mb(); /* Ensure _sleep true before scan. */ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) if (ACCESS_ONCE(rdp->nocb_head)) { /* Found CB, so short-circuit next wait. */ - my_rdp->nocb_leader_wake = true; + my_rdp->nocb_leader_sleep = false; break; } goto wait_again; @@ -2307,17 +2307,17 @@ wait_again: rcu_nocb_wait_gp(my_rdp); /* - * We left ->nocb_leader_wake set to reduce cache thrashing. - * We clear it now, but recheck for new callbacks while + * We left ->nocb_leader_sleep unset to reduce cache thrashing. + * We set it now, but recheck for new callbacks while * traversing our follower list. */ - my_rdp->nocb_leader_wake = false; - smp_mb(); /* Ensure _wake false before scan of ->nocb_head. */ + my_rdp->nocb_leader_sleep = true; + smp_mb(); /* Ensure _sleep true before scan of ->nocb_head. */ /* Each pass through the following loop wakes a follower, if needed. */ for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { if (ACCESS_ONCE(rdp->nocb_head)) - my_rdp->nocb_leader_wake = true; /* No need to wait. */ + my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ if (!rdp->nocb_gp_head) continue; /* No CBs, so no need to wake follower. */ diff --git a/kernel/resource.c b/kernel/resource.c index da14b8d09296..60c5a3856ab7 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -351,15 +351,12 @@ static int find_next_iomem_res(struct resource *res, char *name, end = res->end; BUG_ON(start >= end); - read_lock(&resource_lock); - - if (first_level_children_only) { - p = iomem_resource.child; + if (first_level_children_only) sibling_only = true; - } else - p = &iomem_resource; - while ((p = next_resource(p, sibling_only))) { + read_lock(&resource_lock); + + for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) { if (p->flags != res->flags) continue; if (name && strcmp(p->name, name)) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99aa6ee3908f..f654a8a298fa 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -225,6 +225,20 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { }; /* + * Kick this CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), + * is NMI safe. + */ +void tick_nohz_full_kick(void) +{ + if (!tick_nohz_full_cpu(smp_processor_id())) + return; + + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +/* * Kick the CPU if it's full dynticks in order to force it to * re-evaluate its dependency on the tick and restart it if necessary. */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fb4a9c2cf8d9..ec1791fae965 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -442,11 +442,12 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) tk->ntp_error = 0; ntp_clear(); } - update_vsyscall(tk); - update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); tk_update_ktime_data(tk); + update_vsyscall(tk); + update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); + if (action & TK_MIRROR) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index afb04b9b818a..b38fb2b9e237 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -626,8 +626,22 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, work = &cpu_buffer->irq_work; } - work->waiters_pending = true; poll_wait(filp, &work->waiters, poll_table); + work->waiters_pending = true; + /* + * There's a tight race between setting the waiters_pending and + * checking if the ring buffer is empty. Once the waiters_pending bit + * is set, the next event will wake the task up, but we can get stuck + * if there's only a single event in. + * + * FIXME: Ideally, we need a memory barrier on the writer side as well, + * but adding a memory barrier to all events will cause too much of a + * performance hit in the fast path. We only need a memory barrier when + * the buffer goes from empty to having content. But as this race is + * extremely small, and it's not a problem if another event comes in, we + * will fix it later. + */ + smp_mb(); if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) |