aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c72
1 files changed, 37 insertions, 35 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ad97f3ba5ec5..091e089063be 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -135,9 +135,8 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
* In theory, the compile should just see 0 here, and optimize out the call
* to sched_rt_avg_update. But I don't trust it...
*/
-#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
- s64 steal = 0, irq_delta = 0;
-#endif
+ s64 __maybe_unused steal = 0, irq_delta = 0;
+
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
@@ -177,7 +176,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->clock_task += delta;
-#ifdef HAVE_SCHED_AVG_IRQ
+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
update_irq_load_avg(rq, irq_delta + steal);
#endif
@@ -701,6 +700,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
if (idle_policy(p->policy)) {
load->weight = scale_load(WEIGHT_IDLEPRIO);
load->inv_weight = WMULT_IDLEPRIO;
+ p->se.runnable_weight = load->weight;
return;
}
@@ -713,6 +713,7 @@ static void set_load_weight(struct task_struct *p, bool update_load)
} else {
load->weight = scale_load(sched_prio_to_weight[prio]);
load->inv_weight = sched_prio_to_wmult[prio];
+ p->se.runnable_weight = load->weight;
}
}
@@ -721,8 +722,10 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
- if (!(flags & ENQUEUE_RESTORE))
+ if (!(flags & ENQUEUE_RESTORE)) {
sched_info_queued(rq, p);
+ psi_enqueue(p, flags & ENQUEUE_WAKEUP);
+ }
p->sched_class->enqueue_task(rq, p, flags);
}
@@ -732,8 +735,10 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & DEQUEUE_NOCLOCK))
update_rq_clock(rq);
- if (!(flags & DEQUEUE_SAVE))
+ if (!(flags & DEQUEUE_SAVE)) {
sched_info_dequeued(rq, p);
+ psi_dequeue(p, flags & DEQUEUE_SLEEP);
+ }
p->sched_class->dequeue_task(rq, p, flags);
}
@@ -2036,6 +2041,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
if (task_cpu(p) != cpu) {
wake_flags |= WF_MIGRATED;
+ psi_ttwu_dequeue(p);
set_task_cpu(p, cpu);
}
@@ -2875,6 +2881,18 @@ unsigned long long nr_context_switches(void)
}
/*
+ * Consumers of these two interfaces, like for example the cpuidle menu
+ * governor, are using nonsensical data. Preferring shallow idle state selection
+ * for a CPU that has IO-wait which might not even end up running the task when
+ * it does become runnable.
+ */
+
+unsigned long nr_iowait_cpu(int cpu)
+{
+ return atomic_read(&cpu_rq(cpu)->nr_iowait);
+}
+
+/*
* IO-wait accounting, and how its mostly bollocks (on SMP).
*
* The idea behind IO-wait account is to account the idle time that we could
@@ -2909,31 +2927,11 @@ unsigned long nr_iowait(void)
unsigned long i, sum = 0;
for_each_possible_cpu(i)
- sum += atomic_read(&cpu_rq(i)->nr_iowait);
+ sum += nr_iowait_cpu(i);
return sum;
}
-/*
- * Consumers of these two interfaces, like for example the cpufreq menu
- * governor are using nonsensical data. Boosting frequency for a CPU that has
- * IO-wait which might not even end up running the task when it does become
- * runnable.
- */
-
-unsigned long nr_iowait_cpu(int cpu)
-{
- struct rq *this = cpu_rq(cpu);
- return atomic_read(&this->nr_iowait);
-}
-
-void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
-{
- struct rq *rq = this_rq();
- *nr_waiters = atomic_read(&rq->nr_iowait);
- *load = rq->load.weight;
-}
-
#ifdef CONFIG_SMP
/*
@@ -3050,6 +3048,7 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
cpu_load_update_active(rq);
calc_global_load_tick(rq);
+ psi_task_tick(rq);
rq_unlock(rq, &rf);
@@ -4932,9 +4931,7 @@ static void do_sched_yield(void)
struct rq_flags rf;
struct rq *rq;
- local_irq_disable();
- rq = this_rq();
- rq_lock(rq, &rf);
+ rq = this_rq_lock_irq(&rf);
schedstat_inc(rq->yld_count);
current->sched_class->yield_task(rq);
@@ -5243,7 +5240,7 @@ out_unlock:
* an error code.
*/
SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
- struct timespec __user *, interval)
+ struct __kernel_timespec __user *, interval)
{
struct timespec64 t;
int retval = sched_rr_get_interval(pid, &t);
@@ -5254,16 +5251,16 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
return retval;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
compat_pid_t, pid,
- struct compat_timespec __user *, interval)
+ struct old_timespec32 __user *, interval)
{
struct timespec64 t;
int retval = sched_rr_get_interval(pid, &t);
if (retval == 0)
- retval = compat_put_timespec64(&t, interval);
+ retval = put_old_timespec32(&t, interval);
return retval;
}
#endif
@@ -5854,11 +5851,14 @@ void __init sched_init_smp(void)
/*
* There's no userspace yet to cause hotplug operations; hence all the
* CPU masks are stable and all blatant races in the below code cannot
- * happen.
+ * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
+ * but there won't be any contention on it.
*/
+ cpus_read_lock();
mutex_lock(&sched_domains_mutex);
sched_init_domains(cpu_active_mask);
mutex_unlock(&sched_domains_mutex);
+ cpus_read_unlock();
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
@@ -6068,6 +6068,8 @@ void __init sched_init(void)
init_schedstats();
+ psi_init();
+
scheduler_running = 1;
}