diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 82 |
1 files changed, 53 insertions, 29 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 43e453ab7e20..dbfb5717d6af 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -548,6 +548,11 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { } * ON_RQ_MIGRATING state is used for migration without holding both * rq->locks. It indicates task_cpu() is not stable, see task_rq_lock(). * + * Additionally it is possible to be ->on_rq but still be considered not + * runnable when p->se.sched_delayed is true. These tasks are on the runqueue + * but will be dequeued as soon as they get picked again. See the + * task_is_runnable() helper. + * * p->on_cpu <- { 0, 1 }: * * is set by prepare_task() and cleared by finish_task() such that it will be @@ -2012,11 +2017,6 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags) if (!(flags & ENQUEUE_NOCLOCK)) update_rq_clock(rq); - if (!(flags & ENQUEUE_RESTORE)) { - sched_info_enqueue(rq, p); - psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED)); - } - p->sched_class->enqueue_task(rq, p, flags); /* * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear @@ -2024,6 +2024,11 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags) */ uclamp_rq_inc(rq, p); + if (!(flags & ENQUEUE_RESTORE)) { + sched_info_enqueue(rq, p); + psi_enqueue(p, flags & ENQUEUE_MIGRATED); + } + if (sched_core_enabled(rq)) sched_core_enqueue(rq, p); } @@ -2041,7 +2046,7 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags) if (!(flags & DEQUEUE_SAVE)) { sched_info_dequeue(rq, p); - psi_dequeue(p, flags & DEQUEUE_SLEEP); + psi_dequeue(p, !(flags & DEQUEUE_SLEEP)); } /* @@ -3518,14 +3523,16 @@ out: * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. */ static inline -int select_task_rq(struct task_struct *p, int cpu, int wake_flags) +int select_task_rq(struct task_struct *p, int cpu, int *wake_flags) { lockdep_assert_held(&p->pi_lock); - if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) - cpu = p->sched_class->select_task_rq(p, cpu, wake_flags); - else + if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p)) { + cpu = p->sched_class->select_task_rq(p, cpu, *wake_flags); + *wake_flags |= WF_RQ_SELECTED; + } else { cpu = cpumask_any(p->cpus_ptr); + } /* * In order not to call set_task_cpu() on a blocking task we need @@ -3659,6 +3666,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, rq->nr_uninterruptible--; #ifdef CONFIG_SMP + if (wake_flags & WF_RQ_SELECTED) + en_flags |= ENQUEUE_RQ_SELECTED; if (wake_flags & WF_MIGRATED) en_flags |= ENQUEUE_MIGRATED; else @@ -4120,6 +4129,8 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) guard(preempt)(); int cpu, success = 0; + wake_flags |= WF_TTWU; + if (p == current) { /* * We're waking current, this means 'p->on_rq' and 'task_cpu(p) @@ -4252,7 +4263,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_cond_load_acquire(&p->on_cpu, !VAL); - cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU); + cpu = select_task_rq(p, p->wake_cpu, &wake_flags); if (task_cpu(p) != cpu) { if (p->in_iowait) { delayacct_blkio_end(p); @@ -4317,9 +4328,10 @@ static bool __task_needs_rq_lock(struct task_struct *p) * @arg: Argument to function. * * Fix the task in it's current state by avoiding wakeups and or rq operations - * and call @func(@arg) on it. This function can use ->on_rq and task_curr() - * to work out what the state is, if required. Given that @func can be invoked - * with a runqueue lock held, it had better be quite lightweight. + * and call @func(@arg) on it. This function can use task_is_runnable() and + * task_curr() to work out what the state is, if required. Given that @func + * can be invoked with a runqueue lock held, it had better be quite + * lightweight. * * Returns: * Whatever @func returns @@ -4793,6 +4805,7 @@ void wake_up_new_task(struct task_struct *p) { struct rq_flags rf; struct rq *rq; + int wake_flags = WF_FORK; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); WRITE_ONCE(p->__state, TASK_RUNNING); @@ -4807,7 +4820,7 @@ void wake_up_new_task(struct task_struct *p) */ p->recent_used_cpu = task_cpu(p); rseq_migrate(p); - __set_task_cpu(p, select_task_rq(p, task_cpu(p), WF_FORK)); + __set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags)); #endif rq = __task_rq_lock(p, &rf); update_rq_clock(rq); @@ -4815,7 +4828,7 @@ void wake_up_new_task(struct task_struct *p) activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL); trace_sched_wakeup_new(p); - wakeup_preempt(rq, p, WF_FORK); + wakeup_preempt(rq, p, wake_flags); #ifdef CONFIG_SMP if (p->sched_class->task_woken) { /* @@ -6537,6 +6550,7 @@ static void __sched notrace __schedule(int sched_mode) * as a preemption by schedule_debug() and RCU. */ bool preempt = sched_mode > SM_NONE; + bool block = false; unsigned long *switch_count; unsigned long prev_state; struct rq_flags rf; @@ -6622,6 +6636,7 @@ static void __sched notrace __schedule(int sched_mode) * After this, schedule() must not care about p->state any more. */ block_task(rq, prev, flags); + block = true; } switch_count = &prev->nvcsw; } @@ -6667,7 +6682,7 @@ picked: migrate_disable_switch(rq, prev); psi_account_irqtime(rq, prev, next); - psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + psi_sched_switch(prev, next, block); trace_sched_switch(preempt, prev, next, prev_state); @@ -7010,20 +7025,20 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag } EXPORT_SYMBOL(default_wake_function); -void __setscheduler_prio(struct task_struct *p, int prio) +const struct sched_class *__setscheduler_class(struct task_struct *p, int prio) { if (dl_prio(prio)) - p->sched_class = &dl_sched_class; - else if (rt_prio(prio)) - p->sched_class = &rt_sched_class; + return &dl_sched_class; + + if (rt_prio(prio)) + return &rt_sched_class; + #ifdef CONFIG_SCHED_CLASS_EXT - else if (task_should_scx(p)) - p->sched_class = &ext_sched_class; + if (task_should_scx(p)) + return &ext_sched_class; #endif - else - p->sched_class = &fair_sched_class; - p->prio = prio; + return &fair_sched_class; } #ifdef CONFIG_RT_MUTEXES @@ -7069,7 +7084,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) { int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; - const struct sched_class *prev_class; + const struct sched_class *prev_class, *next_class; struct rq_flags rf; struct rq *rq; @@ -7127,6 +7142,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) queue_flag &= ~DEQUEUE_MOVE; prev_class = p->sched_class; + next_class = __setscheduler_class(p, prio); + + if (prev_class != next_class && p->se.sched_delayed) + dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK); + queued = task_on_rq_queued(p); running = task_current(rq, p); if (queued) @@ -7164,7 +7184,9 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) p->rt.timeout = 0; } - __setscheduler_prio(p, prio); + p->sched_class = next_class; + p->prio = prio; + check_class_changing(rq, p, prev_class); if (queued) @@ -10458,7 +10480,9 @@ void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) return; if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan))) return; - task_work_add(curr, work, TWA_RESUME); + + /* No page allocation under rq lock */ + task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC); } void sched_mm_cid_exit_signals(struct task_struct *t) |