diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 221 |
1 files changed, 172 insertions, 49 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f..af017e038b48 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -152,7 +152,7 @@ __read_mostly int scheduler_running; DEFINE_STATIC_KEY_FALSE(__sched_core_enabled); /* kernel prio, less is more */ -static inline int __task_prio(struct task_struct *p) +static inline int __task_prio(const struct task_struct *p) { if (p->sched_class == &stop_sched_class) /* trumps deadline */ return -2; @@ -174,7 +174,8 @@ static inline int __task_prio(struct task_struct *p) */ /* real prio, less is less */ -static inline bool prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) +static inline bool prio_less(const struct task_struct *a, + const struct task_struct *b, bool in_fi) { int pa = __task_prio(a), pb = __task_prio(b); @@ -194,7 +195,8 @@ static inline bool prio_less(struct task_struct *a, struct task_struct *b, bool return false; } -static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b) +static inline bool __sched_core_less(const struct task_struct *a, + const struct task_struct *b) { if (a->core_cookie < b->core_cookie) return true; @@ -2604,27 +2606,71 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) .user_mask = NULL, .flags = SCA_USER, /* clear the user requested mask */ }; + union cpumask_rcuhead { + cpumask_t cpumask; + struct rcu_head rcu; + }; __do_set_cpus_allowed(p, &ac); - kfree(ac.user_mask); + + /* + * Because this is called with p->pi_lock held, it is not possible + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using + * kfree_rcu(). + */ + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); +} + +static cpumask_t *alloc_user_cpus_ptr(int node) +{ + /* + * See do_set_cpus_allowed() above for the rcu_head usage. + */ + int size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); + + return kmalloc_node(size, GFP_KERNEL, node); } int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { + cpumask_t *user_mask; unsigned long flags; - if (!src->user_cpus_ptr) + /* + * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's + * may differ by now due to racing. + */ + dst->user_cpus_ptr = NULL; + + /* + * This check is racy and losing the race is a valid situation. + * It is not worth the extra overhead of taking the pi_lock on + * every fork/clone. + */ + if (data_race(!src->user_cpus_ptr)) return 0; - dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); - if (!dst->user_cpus_ptr) + user_mask = alloc_user_cpus_ptr(node); + if (!user_mask) return -ENOMEM; - /* Use pi_lock to protect content of user_cpus_ptr */ + /* + * Use pi_lock to protect content of user_cpus_ptr + * + * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent + * do_set_cpus_allowed(). + */ raw_spin_lock_irqsave(&src->pi_lock, flags); - cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + if (src->user_cpus_ptr) { + swap(dst->user_cpus_ptr, user_mask); + cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + } raw_spin_unlock_irqrestore(&src->pi_lock, flags); + + if (unlikely(user_mask)) + kfree(user_mask); + return 0; } @@ -2907,8 +2953,11 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p, } if (!(ctx->flags & SCA_MIGRATE_ENABLE)) { - if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) + if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) { + if (ctx->flags & SCA_USER) + swap(p->user_cpus_ptr, ctx->user_mask); goto out; + } if (WARN_ON_ONCE(p == current && is_migration_disabled(p) && @@ -3581,6 +3630,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) return false; } +static inline cpumask_t *alloc_user_cpus_ptr(int node) +{ + return NULL; +} + #endif /* !CONFIG_SMP */ static void @@ -3623,14 +3677,39 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) } /* - * Mark the task runnable and perform wakeup-preemption. + * Mark the task runnable. */ -static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, - struct rq_flags *rf) +static inline void ttwu_do_wakeup(struct task_struct *p) { - check_preempt_curr(rq, p, wake_flags); WRITE_ONCE(p->__state, TASK_RUNNING); trace_sched_wakeup(p); +} + +static void +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, + struct rq_flags *rf) +{ + int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; + + lockdep_assert_rq_held(rq); + + if (p->sched_contributes_to_load) + rq->nr_uninterruptible--; + +#ifdef CONFIG_SMP + if (wake_flags & WF_MIGRATED) + en_flags |= ENQUEUE_MIGRATED; + else +#endif + if (p->in_iowait) { + delayacct_blkio_end(p); + atomic_dec(&task_rq(p)->nr_iowait); + } + + activate_task(rq, p, en_flags); + check_preempt_curr(rq, p, wake_flags); + + ttwu_do_wakeup(p); #ifdef CONFIG_SMP if (p->sched_class->task_woken) { @@ -3660,31 +3739,6 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, #endif } -static void -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, - struct rq_flags *rf) -{ - int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; - - lockdep_assert_rq_held(rq); - - if (p->sched_contributes_to_load) - rq->nr_uninterruptible--; - -#ifdef CONFIG_SMP - if (wake_flags & WF_MIGRATED) - en_flags |= ENQUEUE_MIGRATED; - else -#endif - if (p->in_iowait) { - delayacct_blkio_end(p); - atomic_dec(&task_rq(p)->nr_iowait); - } - - activate_task(rq, p, en_flags); - ttwu_do_wakeup(rq, p, wake_flags, rf); -} - /* * Consider @p being inside a wait loop: * @@ -3718,9 +3772,15 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags) rq = __task_rq_lock(p, &rf); if (task_on_rq_queued(p)) { - /* check_preempt_curr() may use rq clock */ - update_rq_clock(rq); - ttwu_do_wakeup(rq, p, wake_flags, &rf); + if (!task_on_cpu(rq, p)) { + /* + * When on_rq && !on_cpu the task is preempted, see if + * it should preempt the task that is current now. + */ + update_rq_clock(rq); + check_preempt_curr(rq, p, wake_flags); + } + ttwu_do_wakeup(p); ret = 1; } __task_rq_unlock(rq, &rf); @@ -4086,8 +4146,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) goto out; trace_sched_waking(p); - WRITE_ONCE(p->__state, TASK_RUNNING); - trace_sched_wakeup(p); + ttwu_do_wakeup(p); goto out; } @@ -5052,6 +5111,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); rseq_preempt(prev); + switch_mm_cid(prev, next); fire_sched_out_preempt_notifiers(prev, next); kmap_local_sched_out(); prepare_task(next); @@ -5282,6 +5342,11 @@ bool single_task_running(void) } EXPORT_SYMBOL(single_task_running); +unsigned long long nr_context_switches_cpu(int cpu) +{ + return cpu_rq(cpu)->nr_switches; +} + unsigned long long nr_context_switches(void) { int i; @@ -5504,7 +5569,9 @@ void scheduler_tick(void) unsigned long thermal_pressure; u64 resched_latency; - arch_scale_freq_tick(); + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + arch_scale_freq_tick(); + sched_clock_tick(); rq_lock(rq, &rf); @@ -6206,7 +6273,7 @@ static bool steal_cookie_task(int cpu, struct sched_domain *sd) { int i; - for_each_cpu_wrap(i, sched_domain_span(sd), cpu) { + for_each_cpu_wrap(i, sched_domain_span(sd), cpu + 1) { if (i == cpu) continue; @@ -8239,12 +8306,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_put_task; - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); - if (!user_mask) { + /* + * With non-SMP configs, user_cpus_ptr/user_mask isn't used and + * alloc_user_cpus_ptr() returns NULL. + */ + user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE); + if (user_mask) { + cpumask_copy(user_mask, in_mask); + } else if (IS_ENABLED(CONFIG_SMP)) { retval = -ENOMEM; goto out_put_task; } - cpumask_copy(user_mask, in_mask); + ac = (struct affinity_context){ .new_mask = in_mask, .user_mask = user_mask, @@ -11305,3 +11378,53 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count) { trace_sched_update_nr_running_tp(rq, count); } + +#ifdef CONFIG_SCHED_MM_CID +void sched_mm_cid_exit_signals(struct task_struct *t) +{ + struct mm_struct *mm = t->mm; + unsigned long flags; + + if (!mm) + return; + local_irq_save(flags); + mm_cid_put(mm, t->mm_cid); + t->mm_cid = -1; + t->mm_cid_active = 0; + local_irq_restore(flags); +} + +void sched_mm_cid_before_execve(struct task_struct *t) +{ + struct mm_struct *mm = t->mm; + unsigned long flags; + + if (!mm) + return; + local_irq_save(flags); + mm_cid_put(mm, t->mm_cid); + t->mm_cid = -1; + t->mm_cid_active = 0; + local_irq_restore(flags); +} + +void sched_mm_cid_after_execve(struct task_struct *t) +{ + struct mm_struct *mm = t->mm; + unsigned long flags; + + if (!mm) + return; + local_irq_save(flags); + t->mm_cid = mm_cid_get(mm); + t->mm_cid_active = 1; + local_irq_restore(flags); + rseq_set_notify_resume(t); +} + +void sched_mm_cid_fork(struct task_struct *t) +{ + WARN_ON_ONCE(!t->mm || t->mm_cid != -1); + t->mm_cid_active = 1; +} +#endif |