diff options
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 148 |
1 files changed, 128 insertions, 20 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 055f935d4421..71b10a9b73cf 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -4,6 +4,7 @@ #include <linux/sched/rt.h> #include <linux/u64_stats_sync.h> #include <linux/sched/deadline.h> +#include <linux/kernel_stat.h> #include <linux/binfmts.h> #include <linux/mutex.h> #include <linux/spinlock.h> @@ -222,7 +223,7 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; } -extern struct mutex sched_domains_mutex; +extern void init_dl_bw(struct dl_bw *dl_b); #ifdef CONFIG_CGROUP_SCHED @@ -404,6 +405,7 @@ struct cfs_rq { unsigned long runnable_load_avg; #ifdef CONFIG_FAIR_GROUP_SCHED unsigned long tg_load_avg_contrib; + unsigned long propagate_avg; #endif atomic_long_t removed_load_avg, removed_util_avg; #ifndef CONFIG_64BIT @@ -539,6 +541,11 @@ struct dl_rq { #ifdef CONFIG_SMP +static inline bool sched_asym_prefer(int a, int b) +{ + return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); +} + /* * We add the notion of a root-domain which will be used to define per-domain * variables. Each exclusive cpuset essentially defines an island domain by @@ -577,6 +584,13 @@ struct root_domain { }; extern struct root_domain def_root_domain; +extern struct mutex sched_domains_mutex; +extern cpumask_var_t fallback_doms; +extern cpumask_var_t sched_domains_tmpmask; + +extern void init_defrootdomain(void); +extern int init_sched_domains(const struct cpumask *cpu_map); +extern void rq_attach_root(struct rq *rq, struct root_domain *rd); #endif /* CONFIG_SMP */ @@ -623,6 +637,7 @@ struct rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; + struct list_head *tmp_alone_branch; #endif /* CONFIG_FAIR_GROUP_SCHED */ /* @@ -637,7 +652,7 @@ struct rq { unsigned long next_balance; struct mm_struct *prev_mm; - unsigned int clock_skip_update; + unsigned int clock_update_flags; u64 clock; u64 clock_task; @@ -761,28 +776,110 @@ static inline u64 __rq_clock_broken(struct rq *rq) return READ_ONCE(rq->clock); } +/* + * rq::clock_update_flags bits + * + * %RQCF_REQ_SKIP - will request skipping of clock update on the next + * call to __schedule(). This is an optimisation to avoid + * neighbouring rq clock updates. + * + * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is + * in effect and calls to update_rq_clock() are being ignored. + * + * %RQCF_UPDATED - is a debug flag that indicates whether a call has been + * made to update_rq_clock() since the last time rq::lock was pinned. + * + * If inside of __schedule(), clock_update_flags will have been + * shifted left (a left shift is a cheap operation for the fast path + * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use, + * + * if (rq-clock_update_flags >= RQCF_UPDATED) + * + * to check if %RQCF_UPADTED is set. It'll never be shifted more than + * one position though, because the next rq_unpin_lock() will shift it + * back. + */ +#define RQCF_REQ_SKIP 0x01 +#define RQCF_ACT_SKIP 0x02 +#define RQCF_UPDATED 0x04 + +static inline void assert_clock_updated(struct rq *rq) +{ + /* + * The only reason for not seeing a clock update since the + * last rq_pin_lock() is if we're currently skipping updates. + */ + SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP); +} + static inline u64 rq_clock(struct rq *rq) { lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + return rq->clock; } static inline u64 rq_clock_task(struct rq *rq) { lockdep_assert_held(&rq->lock); + assert_clock_updated(rq); + return rq->clock_task; } -#define RQCF_REQ_SKIP 0x01 -#define RQCF_ACT_SKIP 0x02 - static inline void rq_clock_skip_update(struct rq *rq, bool skip) { lockdep_assert_held(&rq->lock); if (skip) - rq->clock_skip_update |= RQCF_REQ_SKIP; + rq->clock_update_flags |= RQCF_REQ_SKIP; else - rq->clock_skip_update &= ~RQCF_REQ_SKIP; + rq->clock_update_flags &= ~RQCF_REQ_SKIP; +} + +struct rq_flags { + unsigned long flags; + struct pin_cookie cookie; +#ifdef CONFIG_SCHED_DEBUG + /* + * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the + * current pin context is stashed here in case it needs to be + * restored in rq_repin_lock(). + */ + unsigned int clock_update_flags; +#endif +}; + +static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) +{ + rf->cookie = lockdep_pin_lock(&rq->lock); + +#ifdef CONFIG_SCHED_DEBUG + rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); + rf->clock_update_flags = 0; +#endif +} + +static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) +{ +#ifdef CONFIG_SCHED_DEBUG + if (rq->clock_update_flags > RQCF_ACT_SKIP) + rf->clock_update_flags = RQCF_UPDATED; +#endif + + lockdep_unpin_lock(&rq->lock, rf->cookie); +} + +static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf) +{ + lockdep_repin_lock(&rq->lock, rf->cookie); + +#ifdef CONFIG_SCHED_DEBUG + /* + * Restore the value we stashed in @rf for this pin context. + */ + rq->clock_update_flags |= rf->clock_update_flags; +#endif } #ifdef CONFIG_NUMA @@ -796,6 +893,16 @@ extern int sched_max_numa_distance; extern bool find_numa_distance(int distance); #endif +#ifdef CONFIG_NUMA +extern void sched_init_numa(void); +extern void sched_domains_numa_masks_set(unsigned int cpu); +extern void sched_domains_numa_masks_clear(unsigned int cpu); +#else +static inline void sched_init_numa(void) { } +static inline void sched_domains_numa_masks_set(unsigned int cpu) { } +static inline void sched_domains_numa_masks_clear(unsigned int cpu) { } +#endif + #ifdef CONFIG_NUMA_BALANCING /* The regions in numa_faults array from task_struct */ enum numa_faults_stats { @@ -892,7 +999,8 @@ struct sched_group_capacity { * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity * for a single CPU. */ - unsigned int capacity; + unsigned long capacity; + unsigned long min_capacity; /* Min per-CPU capacity in group */ unsigned long next_update; int imbalance; /* XXX unrelated to capacity but shared group state */ @@ -905,6 +1013,7 @@ struct sched_group { unsigned int group_weight; struct sched_group_capacity *sgc; + int asym_prefer_cpu; /* cpu of highest priority in group */ /* * The CPUs this group covers. @@ -960,7 +1069,7 @@ static inline void sched_ttwu_pending(void) { } #endif /* CONFIG_SMP */ #include "stats.h" -#include "auto_group.h" +#include "autogroup.h" #ifdef CONFIG_CGROUP_SCHED @@ -1236,7 +1345,7 @@ struct sched_class { */ struct task_struct * (*pick_next_task) (struct rq *rq, struct task_struct *prev, - struct pin_cookie cookie); + struct rq_flags *rf); void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP @@ -1492,11 +1601,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } static inline void sched_avg_update(struct rq *rq) { } #endif -struct rq_flags { - unsigned long flags; - struct pin_cookie cookie; -}; - struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) __acquires(rq->lock); struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) @@ -1506,7 +1610,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { - lockdep_unpin_lock(&rq->lock, rf->cookie); + rq_unpin_lock(rq, rf); raw_spin_unlock(&rq->lock); } @@ -1515,7 +1619,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) __releases(rq->lock) __releases(p->pi_lock) { - lockdep_unpin_lock(&rq->lock, rf->cookie); + rq_unpin_lock(rq, rf); raw_spin_unlock(&rq->lock); raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } @@ -1665,6 +1769,10 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) __release(rq2->lock); } +extern void set_rq_online (struct rq *rq); +extern void set_rq_offline(struct rq *rq); +extern bool sched_smp_initialized; + #else /* CONFIG_SMP */ /* @@ -1741,8 +1849,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { } #ifdef CONFIG_IRQ_TIME_ACCOUNTING struct irqtime { - u64 hardirq_time; - u64 softirq_time; + u64 tick_delta; u64 irq_start_time; struct u64_stats_sync sync; }; @@ -1752,12 +1859,13 @@ DECLARE_PER_CPU(struct irqtime, cpu_irqtime); static inline u64 irq_time_read(int cpu) { struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); + u64 *cpustat = kcpustat_cpu(cpu).cpustat; unsigned int seq; u64 total; do { seq = __u64_stats_fetch_begin(&irqtime->sync); - total = irqtime->softirq_time + irqtime->hardirq_time; + total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ]; } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); return total; |