14 files changed, 102 insertions, 71 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 0a495ab35bc7..d8560ee3bab7 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -58,36 +58,13 @@ static void context_tracking_recursion_exit(void)
  * instructions to execute won't use any RCU read side critical section
  * because this function sets RCU in extended quiescent state.
  */
-void context_tracking_enter(enum ctx_state state)
+void __context_tracking_enter(enum ctx_state state)
 {
-	unsigned long flags;
-
-	/*
-	 * Repeat the user_enter() check here because some archs may be calling
-	 * this from asm and if no CPU needs context tracking, they shouldn't
-	 * go further. Repeat the check here until they support the inline static
-	 * key check.
-	 */
-	if (!context_tracking_is_enabled())
-		return;
-
-	/*
-	 * Some contexts may involve an exception occuring in an irq,
-	 * leading to that nesting:
-	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
-	 * helpers are enough to protect RCU uses inside the exception. So
-	 * just return immediately if we detect we are in an IRQ.
-	 */
-	if (in_interrupt())
-		return;
-
 	/* Kernel threads aren't supposed to go to userspace */
 	WARN_ON_ONCE(!current->mm);
 
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if ( __this_cpu_read(context_tracking.state) != state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -120,7 +97,27 @@ void context_tracking_enter(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, state);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_enter);
+EXPORT_SYMBOL_GPL(__context_tracking_enter);
+
+void context_tracking_enter(enum ctx_state state)
+{
+	unsigned long flags;
+
+	/*
+	 * Some contexts may involve an exception occuring in an irq,
+	 * leading to that nesting:
+	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
+	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
+	 * helpers are enough to protect RCU uses inside the exception. So
+	 * just return immediately if we detect we are in an IRQ.
+	 */
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_enter(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_enter);
@@ -128,7 +125,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter);
 
 void context_tracking_user_enter(void)
 {
-	context_tracking_enter(CONTEXT_USER);
+	user_enter();
 }
 NOKPROBE_SYMBOL(context_tracking_user_enter);
 
@@ -144,19 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter);
  * This call supports re-entrancy. This way it can be called from any exception
  * handler without needing to know if we came from userspace or not.
  */
-void context_tracking_exit(enum ctx_state state)
+void __context_tracking_exit(enum ctx_state state)
 {
-	unsigned long flags;
-
-	if (!context_tracking_is_enabled())
-		return;
-
-	if (in_interrupt())
-		return;
-
-	local_irq_save(flags);
 	if (!context_tracking_recursion_enter())
-		goto out_irq_restore;
+		return;
 
 	if (__this_cpu_read(context_tracking.state) == state) {
 		if (__this_cpu_read(context_tracking.active)) {
@@ -173,7 +161,19 @@ void context_tracking_exit(enum ctx_state state)
 		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
 	}
 	context_tracking_recursion_exit();
-out_irq_restore:
+}
+NOKPROBE_SYMBOL(__context_tracking_exit);
+EXPORT_SYMBOL_GPL(__context_tracking_exit);
+
+void context_tracking_exit(enum ctx_state state)
+{
+	unsigned long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+	__context_tracking_exit(state);
 	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(context_tracking_exit);
@@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit);
 
 void context_tracking_user_exit(void)
 {
-	context_tracking_exit(CONTEXT_USER);
+	user_exit();
 }
 NOKPROBE_SYMBOL(context_tracking_user_exit);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1a734e0adfa7..36babfd20648 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1050,13 +1050,13 @@ retry:
 	/*
 	 * One of the few rules of preemptible RCU is that one cannot do
 	 * rcu_read_unlock() while holding a scheduler (or nested) lock when
-	 * part of the read side critical section was preemptible -- see
+	 * part of the read side critical section was irqs-enabled -- see
 	 * rcu_read_unlock_special().
 	 *
 	 * Since ctx->lock nests under rq->lock we must ensure the entire read
-	 * side critical section is non-preemptible.
+	 * side critical section has interrupts disabled.
 	 */
-	preempt_disable();
+	local_irq_save(*flags);
 	rcu_read_lock();
 	ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
 	if (ctx) {
@@ -1070,21 +1070,22 @@ retry:
 		 * if so.  If we locked the right context, then it
 		 * can't get swapped on us any more.
 		 */
-		raw_spin_lock_irqsave(&ctx->lock, *flags);
+		raw_spin_lock(&ctx->lock);
 		if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
-			raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+			raw_spin_unlock(&ctx->lock);
 			rcu_read_unlock();
-			preempt_enable();
+			local_irq_restore(*flags);
 			goto retry;
 		}
 
 		if (!atomic_inc_not_zero(&ctx->refcount)) {
-			raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+			raw_spin_unlock(&ctx->lock);
 			ctx = NULL;
 		}
 	}
 	rcu_read_unlock();
-	preempt_enable();
+	if (!ctx)
+		local_irq_restore(*flags);
 	return ctx;
 }
 
@@ -6913,6 +6914,10 @@ static int perf_tp_filter_match(struct perf_event *event,
 {
 	void *record = data->raw->data;
 
+	/* only top level events have filters set */
+	if (event->parent)
+		event = event->parent;
+
 	if (likely(!event->filter) || filter_match_preds(event->filter, record))
 		return 1;
 	return 0;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 05c2188271b8..fcab63c66905 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -199,6 +199,11 @@ static inline int irq_desc_get_node(struct irq_desc *desc)
 	return irq_common_data_get_node(&desc->irq_common_data);
 }
 
+static inline int irq_desc_is_chained(struct irq_desc *desc)
+{
+	return (desc->action && desc->action == &chained_action);
+}
+
 #ifdef CONFIG_PM_SLEEP
 bool irq_pm_check_wakeup(struct irq_desc *desc);
 void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index e80c4400118a..cea1de0161f1 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -70,7 +70,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
 
 static bool suspend_device_irq(struct irq_desc *desc)
 {
-	if (!desc->action || desc->no_suspend_depth)
+	if (!desc->action || irq_desc_is_chained(desc) ||
+	    desc->no_suspend_depth)
 		return false;
 
 	if (irqd_is_wakeup_set(&desc->irq_data)) {
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index a916cf144b65..a2c02fd5d6d0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -475,7 +475,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	for_each_online_cpu(j)
 		any_count |= kstat_irqs_cpu(i, j);
 	action = desc->action;
-	if ((!action || action == &chained_action) && !any_count)
+	if ((!action || irq_desc_is_chained(desc)) && !any_count)
 		goto out;
 
 	seq_printf(p, "%*d: ", prec, i);
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 6e5344112419..db545cbcdb89 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -294,6 +294,12 @@ static int klp_write_object_relocations(struct module *pmod,
 
 	for (reloc = obj->relocs; reloc->name; reloc++) {
 		if (!klp_is_module(obj)) {
+
+#if defined(CONFIG_RANDOMIZE_BASE)
+			/* If KASLR has been enabled, adjust old value accordingly */
+			if (kaslr_enabled())
+				reloc->val += kaslr_offset();
+#endif
 			ret = klp_verify_vmlinux_symbol(reloc->name,
 							reloc->val);
 			if (ret)
diff --git a/kernel/panic.c b/kernel/panic.c
index 4579dbb7ed87..4b150bc0c6c1 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -152,8 +152,11 @@ void panic(const char *fmt, ...)
 	 * We may have ended up stopping the CPU holding the lock (in
 	 * smp_send_stop()) while still having some valuable data in the console
 	 * buffer.  Try to acquire the lock then release it regardless of the
-	 * result.  The release will also print the buffers out.
+	 * result.  The release will also print the buffers out.  Locks debug
+	 * should be disabled to avoid reporting bad unlock balance when
+	 * panic() is not being callled from OOPS.
 	 */
+	debug_locks_off();
 	console_trylock();
 	console_unlock();
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 824aa9f501a3..f04fda8f669c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2302,7 +2302,7 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 	now = curr->se.sum_exec_runtime;
 	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
 
-	if (now - curr->node_stamp > period) {
+	if (now > curr->node_stamp + period) {
 		if (!curr->node_stamp)
 			curr->numa_scan_period = task_scan_min(curr);
 		curr->node_stamp += period;
diff --git a/kernel/signal.c b/kernel/signal.c
index c0b01fe24bbd..f3f1f7a972fd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3503,7 +3503,7 @@ SYSCALL_DEFINE0(pause)
 
 #endif
 
-int sigsuspend(sigset_t *set)
+static int sigsuspend(sigset_t *set)
 {
 	current->saved_sigmask = current->blocked;
 	set_current_blocked(set);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 74591ba9474f..bbc5d1114583 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -977,13 +977,29 @@ EXPORT_SYMBOL(add_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
+	struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu);
+	struct tvec_base *base;
 	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(timer_pending(timer) || !timer->function);
-	spin_lock_irqsave(&base->lock, flags);
-	timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
+
+	/*
+	 * If @timer was on a different CPU, it should be migrated with the
+	 * old base locked to prevent other operations proceeding with the
+	 * wrong base locked.  See lock_timer_base().
+	 */
+	base = lock_timer_base(timer, &flags);
+	if (base != new_base) {
+		timer->flags |= TIMER_MIGRATING;
+
+		spin_unlock(&base->lock);
+		base = new_base;
+		spin_lock(&base->lock);
+		WRITE_ONCE(timer->flags,
+			   (timer->flags & ~TIMER_BASEMASK) | cpu);
+	}
+
 	debug_activate(timer, timer->expires);
 	internal_add_timer(base, timer);
 	spin_unlock_irqrestore(&base->lock, flags);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8d6363f42169..e45db6b0d878 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -434,7 +434,7 @@ config UPROBE_EVENT
 
 config BPF_EVENTS
 	depends on BPF_SYSCALL
-	depends on KPROBE_EVENT || UPROBE_EVENT
+	depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
 	bool
 	default y
 	help
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2198a630ef58..87fb9801bd9e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -100,8 +100,6 @@ static DEFINE_PER_CPU(bool, trace_cmdline_save);
  */
 static int tracing_disabled = 1;
 
-DEFINE_PER_CPU(int, ftrace_cpu_disabled);
-
 cpumask_var_t __read_mostly	tracing_buffer_mask;
 
 /*
@@ -1775,10 +1773,6 @@ trace_function(struct trace_array *tr,
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
 
-	/* If we are reading the ring buffer, don't trace */
-	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
-		return;
-
 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
 					  flags, pc);
 	if (!event)
@@ -4554,6 +4548,8 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_TRACER_MAX_TRACE
+
 static ssize_t
 tracing_max_lat_read(struct file *filp, char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
@@ -4568,6 +4564,8 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
 }
 
+#endif
+
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
 	struct trace_array *tr = inode->i_private;
@@ -5469,12 +5467,14 @@ static const struct file_operations tracing_thresh_fops = {
 	.llseek		= generic_file_llseek,
 };
 
+#ifdef CONFIG_TRACER_MAX_TRACE
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
 	.write		= tracing_max_lat_write,
 	.llseek		= generic_file_llseek,
 };
+#endif
 
 static const struct file_operations set_tracer_fops = {
 	.open		= tracing_open_generic,
@@ -6847,7 +6847,9 @@ struct dentry *tracing_init_dentry(void)
 	if (tr->dir)
 		return NULL;
 
-	if (WARN_ON(!debugfs_initialized()))
+	if (WARN_ON(!tracefs_initialized()) ||
+		(IS_ENABLED(CONFIG_DEBUG_FS) &&
+		 WARN_ON(!debugfs_initialized())))
 		return ERR_PTR(-ENODEV);
 
 	/*
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dd7620802e72..919d9d07686f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,7 +667,6 @@ extern int DYN_FTRACE_TEST_NAME2(void);
 
 extern bool ring_buffer_expanded;
 extern bool tracing_selftest_disabled;
-DECLARE_PER_CPU(int, ftrace_cpu_disabled);
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 extern int trace_selftest_startup_function(struct tracer *trace,
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 92382af7a213..a663cbb84107 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -288,9 +288,6 @@ int __trace_graph_entry(struct trace_array *tr,
 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
 	struct ftrace_graph_ent_entry *entry;
 
-	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
-		return 0;
-
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
 					  sizeof(*entry), flags, pc);
 	if (!event)
@@ -403,9 +400,6 @@ void __trace_graph_return(struct trace_array *tr,
 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
 	struct ftrace_graph_ret_entry *entry;
 
-	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
-		return;
-
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
 					  sizeof(*entry), flags, pc);
 	if (!event)