aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c188
1 files changed, 123 insertions, 65 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d575b4914925..bfa7452ca92e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -26,7 +26,10 @@
#include <linux/topology.h>
#include <linux/sched/clock.h>
#include <linux/sched/cond_resched.h>
+#include <linux/sched/cputime.h>
#include <linux/sched/debug.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/init.h>
#include <linux/sched/isolation.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/mm.h>
@@ -145,12 +148,6 @@ const_debug unsigned int sysctl_sched_nr_migrate = 8;
const_debug unsigned int sysctl_sched_nr_migrate = 32;
#endif
-/*
- * period over which we measure -rt task CPU usage in us.
- * default: 1s
- */
-unsigned int sysctl_sched_rt_period = 1000000;
-
__read_mostly int scheduler_running;
#ifdef CONFIG_SCHED_CORE
@@ -445,13 +442,6 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
#endif /* CONFIG_SCHED_CORE */
/*
- * part of the period that we allow rt tasks to run in us.
- * default: 0.95s
- */
-int sysctl_sched_rt_runtime = 950000;
-
-
-/*
* Serialization rules:
*
* Lock order:
@@ -610,10 +600,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
swap(rq1, rq2);
raw_spin_rq_lock(rq1);
- if (__rq_lockp(rq1) == __rq_lockp(rq2))
- return;
+ if (__rq_lockp(rq1) != __rq_lockp(rq2))
+ raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
- raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+ double_rq_clock_clear_update(rq1, rq2);
}
#endif
@@ -1319,10 +1309,10 @@ static void set_load_weight(struct task_struct *p, bool update_load)
static DEFINE_MUTEX(uclamp_mutex);
/* Max allowed minimum utilization */
-unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
+static unsigned int __maybe_unused sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
/* Max allowed maximum utilization */
-unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
+static unsigned int __maybe_unused sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
/*
* By default RT tasks run at the maximum performance point/capacity of the
@@ -1339,7 +1329,7 @@ unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
* This knob will not override the system default sched_util_clamp_min defined
* above.
*/
-unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
+static unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
/* All clamps are required to be less or equal than these values */
static struct uclamp_se uclamp_default[UCLAMP_CNT];
@@ -1469,33 +1459,6 @@ static void uclamp_update_util_min_rt_default(struct task_struct *p)
task_rq_unlock(rq, p, &rf);
}
-static void uclamp_sync_util_min_rt_default(void)
-{
- struct task_struct *g, *p;
-
- /*
- * copy_process() sysctl_uclamp
- * uclamp_min_rt = X;
- * write_lock(&tasklist_lock) read_lock(&tasklist_lock)
- * // link thread smp_mb__after_spinlock()
- * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock);
- * sched_post_fork() for_each_process_thread()
- * __uclamp_sync_rt() __uclamp_sync_rt()
- *
- * Ensures that either sched_post_fork() will observe the new
- * uclamp_min_rt or for_each_process_thread() will observe the new
- * task.
- */
- read_lock(&tasklist_lock);
- smp_mb__after_spinlock();
- read_unlock(&tasklist_lock);
-
- rcu_read_lock();
- for_each_process_thread(g, p)
- uclamp_update_util_min_rt_default(p);
- rcu_read_unlock();
-}
-
static inline struct uclamp_se
uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
{
@@ -1775,6 +1738,11 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css)
}
static void cpu_util_update_eff(struct cgroup_subsys_state *css);
+#endif
+
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_UCLAMP_TASK
+#ifdef CONFIG_UCLAMP_TASK_GROUP
static void uclamp_update_root_tg(void)
{
struct task_group *tg = &root_task_group;
@@ -1792,7 +1760,34 @@ static void uclamp_update_root_tg(void)
static void uclamp_update_root_tg(void) { }
#endif
-int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
+static void uclamp_sync_util_min_rt_default(void)
+{
+ struct task_struct *g, *p;
+
+ /*
+ * copy_process() sysctl_uclamp
+ * uclamp_min_rt = X;
+ * write_lock(&tasklist_lock) read_lock(&tasklist_lock)
+ * // link thread smp_mb__after_spinlock()
+ * write_unlock(&tasklist_lock) read_unlock(&tasklist_lock);
+ * sched_post_fork() for_each_process_thread()
+ * __uclamp_sync_rt() __uclamp_sync_rt()
+ *
+ * Ensures that either sched_post_fork() will observe the new
+ * uclamp_min_rt or for_each_process_thread() will observe the new
+ * task.
+ */
+ read_lock(&tasklist_lock);
+ smp_mb__after_spinlock();
+ read_unlock(&tasklist_lock);
+
+ rcu_read_lock();
+ for_each_process_thread(g, p)
+ uclamp_update_util_min_rt_default(p);
+ rcu_read_unlock();
+}
+
+static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
bool update_root_tg = false;
@@ -1856,6 +1851,8 @@ done:
return result;
}
+#endif
+#endif
static int uclamp_validate(struct task_struct *p,
const struct sched_attr *attr)
@@ -2190,7 +2187,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
if (p->sched_class == rq->curr->sched_class)
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
- else if (p->sched_class > rq->curr->sched_class)
+ else if (sched_class_above(p->sched_class, rq->curr->sched_class))
resched_curr(rq);
/*
@@ -2408,7 +2405,7 @@ static int migration_cpu_stop(void *data)
* __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
- flush_smp_call_function_from_idle();
+ flush_smp_call_function_queue();
raw_spin_lock(&p->pi_lock);
rq_lock(rq, &rf);
@@ -4430,7 +4427,7 @@ out:
__setup("schedstats=", setup_schedstats);
#ifdef CONFIG_PROC_SYSCTL
-int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
+static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
@@ -4452,6 +4449,52 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
#endif /* CONFIG_PROC_SYSCTL */
#endif /* CONFIG_SCHEDSTATS */
+#ifdef CONFIG_SYSCTL
+static struct ctl_table sched_core_sysctls[] = {
+#ifdef CONFIG_SCHEDSTATS
+ {
+ .procname = "sched_schedstats",
+ .data = NULL,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_schedstats,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif /* CONFIG_SCHEDSTATS */
+#ifdef CONFIG_UCLAMP_TASK
+ {
+ .procname = "sched_util_clamp_min",
+ .data = &sysctl_sched_uclamp_util_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_uclamp_handler,
+ },
+ {
+ .procname = "sched_util_clamp_max",
+ .data = &sysctl_sched_uclamp_util_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_uclamp_handler,
+ },
+ {
+ .procname = "sched_util_clamp_min_rt_default",
+ .data = &sysctl_sched_uclamp_util_min_rt_default,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_uclamp_handler,
+ },
+#endif /* CONFIG_UCLAMP_TASK */
+ {}
+};
+static int __init sched_core_sysctl_init(void)
+{
+ register_sysctl_init("kernel", sched_core_sysctls);
+ return 0;
+}
+late_initcall(sched_core_sysctl_init);
+#endif /* CONFIG_SYSCTL */
+
/*
* fork()/clone()-time setup:
*/
@@ -5689,7 +5732,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* higher scheduling class, because otherwise those lose the
* opportunity to pull in more work from other CPUs.
*/
- if (likely(prev->sched_class <= &fair_sched_class &&
+ if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) &&
rq->nr_running == rq->cfs.h_nr_running)) {
p = pick_next_task_fair(rq, prev, rf);
@@ -5752,6 +5795,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
+static void queue_core_balance(struct rq *rq);
+
static struct task_struct *
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
@@ -5801,7 +5846,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
rq->core_pick = NULL;
- return next;
+ goto out;
}
put_prev_task_balance(rq, prev, rf);
@@ -5851,7 +5896,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
*/
WARN_ON_ONCE(fi_before);
task_vruntime_update(rq, next, false);
- goto done;
+ goto out_set_next;
}
}
@@ -5970,8 +6015,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
resched_curr(rq_i);
}
-done:
+out_set_next:
set_next_task(rq, next);
+out:
+ if (rq->core->core_forceidle_count && next == rq->idle)
+ queue_core_balance(rq);
+
return next;
}
@@ -6000,7 +6049,7 @@ static bool try_steal_cookie(int this, int that)
if (p == src->core_pick || p == src->curr)
goto next;
- if (!cpumask_test_cpu(this, &p->cpus_mask))
+ if (!is_cpu_allowed(p, this))
goto next;
if (p->core_occupation > dst->idle->core_occupation)
@@ -6066,7 +6115,7 @@ static void sched_core_balance(struct rq *rq)
static DEFINE_PER_CPU(struct callback_head, core_balance_head);
-void queue_core_balance(struct rq *rq)
+static void queue_core_balance(struct rq *rq)
{
if (!sched_core_enabled(rq))
return;
@@ -6304,10 +6353,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
/*
* We must load prev->state once (task_struct::state is volatile), such
- * that:
- *
- * - we form a control dependency vs deactivate_task() below.
- * - ptrace_{,un}freeze_traced() can change ->state underneath us.
+ * that we form a control dependency vs deactivate_task() below.
*/
prev_state = READ_ONCE(prev->__state);
if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
@@ -6376,7 +6422,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
@@ -8409,6 +8455,18 @@ static void __init preempt_dynamic_init(void)
}
}
+#define PREEMPT_MODEL_ACCESSOR(mode) \
+ bool preempt_model_##mode(void) \
+ { \
+ WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \
+ return preempt_dynamic_mode == preempt_dynamic_##mode; \
+ } \
+ EXPORT_SYMBOL_GPL(preempt_model_##mode)
+
+PREEMPT_MODEL_ACCESSOR(none);
+PREEMPT_MODEL_ACCESSOR(voluntary);
+PREEMPT_MODEL_ACCESSOR(full);
+
#else /* !CONFIG_PREEMPT_DYNAMIC */
static inline void preempt_dynamic_init(void) { }
@@ -9451,11 +9509,11 @@ void __init sched_init(void)
int i;
/* Make sure the linker didn't screw up */
- BUG_ON(&idle_sched_class + 1 != &fair_sched_class ||
- &fair_sched_class + 1 != &rt_sched_class ||
- &rt_sched_class + 1 != &dl_sched_class);
+ BUG_ON(&idle_sched_class != &fair_sched_class + 1 ||
+ &fair_sched_class != &rt_sched_class + 1 ||
+ &rt_sched_class != &dl_sched_class + 1);
#ifdef CONFIG_SMP
- BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
+ BUG_ON(&dl_sched_class != &stop_sched_class + 1);
#endif
wait_bit_init();