diff options
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 190 |
1 files changed, 174 insertions, 16 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3744f16a1293..8063db62b027 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -193,9 +193,18 @@ static inline int idle_policy(int policy) return policy == SCHED_IDLE; } +static inline int normal_policy(int policy) +{ +#ifdef CONFIG_SCHED_CLASS_EXT + if (policy == SCHED_EXT) + return true; +#endif + return policy == SCHED_NORMAL; +} + static inline int fair_policy(int policy) { - return policy == SCHED_NORMAL || policy == SCHED_BATCH; + return normal_policy(policy) || policy == SCHED_BATCH; } static inline int rt_policy(int policy) @@ -246,6 +255,24 @@ static inline void update_avg(u64 *avg, u64 sample) (val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1)) /* + * cgroup weight knobs should use the common MIN, DFL and MAX values which are + * 1, 100 and 10000 respectively. While it loses a bit of range on both ends, it + * maps pretty well onto the shares value used by scheduler and the round-trip + * conversions preserve the original value over the entire range. + */ +static inline unsigned long sched_weight_from_cgroup(unsigned long cgrp_weight) +{ + return DIV_ROUND_CLOSEST_ULL(cgrp_weight * 1024, CGROUP_WEIGHT_DFL); +} + +static inline unsigned long sched_weight_to_cgroup(unsigned long weight) +{ + return clamp_t(unsigned long, + DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024), + CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX); +} + +/* * !! For sched_setattr_nocheck() (kernel) only !! * * This is actually gross. :( @@ -432,6 +459,11 @@ struct task_group { struct rt_bandwidth rt_bandwidth; #endif +#ifdef CONFIG_EXT_GROUP_SCHED + u32 scx_flags; /* SCX_TG_* */ + u32 scx_weight; +#endif + struct rcu_head rcu; struct list_head list; @@ -456,7 +488,7 @@ struct task_group { }; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED_WEIGHT #define ROOT_TASK_GROUP_LOAD NICE_0_LOAD /* @@ -487,6 +519,11 @@ static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) return walk_tg_tree_from(&root_task_group, down, up, data); } +static inline struct task_group *css_tg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct task_group, css) : NULL; +} + extern int tg_nop(struct task_group *tg, void *data); #ifdef CONFIG_FAIR_GROUP_SCHED @@ -543,6 +580,8 @@ extern void set_task_rq_fair(struct sched_entity *se, static inline void set_task_rq_fair(struct sched_entity *se, struct cfs_rq *prev, struct cfs_rq *next) { } #endif /* CONFIG_SMP */ +#else /* !CONFIG_FAIR_GROUP_SCHED */ +static inline int sched_group_set_shares(struct task_group *tg, unsigned long shares) { return 0; } #endif /* CONFIG_FAIR_GROUP_SCHED */ #else /* CONFIG_CGROUP_SCHED */ @@ -596,6 +635,11 @@ do { \ # define u64_u32_load(var) u64_u32_load_copy(var, var##_copy) # define u64_u32_store(var, val) u64_u32_store_copy(var, var##_copy, val) +struct balance_callback { + struct balance_callback *next; + void (*func)(struct rq *rq); +}; + /* CFS-related fields in a runqueue */ struct cfs_rq { struct load_weight load; @@ -695,6 +739,44 @@ struct cfs_rq { #endif /* CONFIG_FAIR_GROUP_SCHED */ }; +#ifdef CONFIG_SCHED_CLASS_EXT +/* scx_rq->flags, protected by the rq lock */ +enum scx_rq_flags { + /* + * A hotplugged CPU starts scheduling before rq_online_scx(). Track + * ops.cpu_on/offline() state so that ops.enqueue/dispatch() are called + * only while the BPF scheduler considers the CPU to be online. + */ + SCX_RQ_ONLINE = 1 << 0, + SCX_RQ_CAN_STOP_TICK = 1 << 1, + SCX_RQ_BAL_KEEP = 1 << 2, /* balance decided to keep current */ + SCX_RQ_BYPASSING = 1 << 3, + + SCX_RQ_IN_WAKEUP = 1 << 16, + SCX_RQ_IN_BALANCE = 1 << 17, +}; + +struct scx_rq { + struct scx_dispatch_q local_dsq; + struct list_head runnable_list; /* runnable tasks on this rq */ + struct list_head ddsp_deferred_locals; /* deferred ddsps from enq */ + unsigned long ops_qseq; + u64 extra_enq_flags; /* see move_task_to_local_dsq() */ + u32 nr_running; + u32 flags; + u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */ + bool cpu_released; + cpumask_var_t cpus_to_kick; + cpumask_var_t cpus_to_kick_if_idle; + cpumask_var_t cpus_to_preempt; + cpumask_var_t cpus_to_wait; + unsigned long pnt_seq; + struct balance_callback deferred_bal_cb; + struct irq_work deferred_irq_work; + struct irq_work kick_cpus_irq_work; +}; +#endif /* CONFIG_SCHED_CLASS_EXT */ + static inline int rt_bandwidth_enabled(void) { return sysctl_sched_rt_runtime >= 0; @@ -1001,11 +1083,6 @@ struct uclamp_rq { DECLARE_STATIC_KEY_FALSE(sched_uclamp_used); #endif /* CONFIG_UCLAMP_TASK */ -struct balance_callback { - struct balance_callback *next; - void (*func)(struct rq *rq); -}; - /* * This is the main, per-CPU runqueue data structure. * @@ -1048,6 +1125,9 @@ struct rq { struct cfs_rq cfs; struct rt_rq rt; struct dl_rq dl; +#ifdef CONFIG_SCHED_CLASS_EXT + struct scx_rq scx; +#endif struct sched_dl_entity fair_server; @@ -2302,6 +2382,7 @@ struct sched_class { void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags); + int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); struct task_struct *(*pick_task)(struct rq *rq); /* * Optional! When implemented pick_next_task() should be equivalent to: @@ -2318,7 +2399,6 @@ struct sched_class { void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first); #ifdef CONFIG_SMP - int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags); void (*migrate_task_rq)(struct task_struct *p, int new_cpu); @@ -2342,8 +2422,11 @@ struct sched_class { * cannot assume the switched_from/switched_to pair is serialized by * rq->lock. They are however serialized by p->pi_lock. */ + void (*switching_to) (struct rq *this_rq, struct task_struct *task); void (*switched_from)(struct rq *this_rq, struct task_struct *task); void (*switched_to) (struct rq *this_rq, struct task_struct *task); + void (*reweight_task)(struct rq *this_rq, struct task_struct *task, + const struct load_weight *lw); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, int oldprio); @@ -2416,19 +2499,54 @@ const struct sched_class name##_sched_class \ extern struct sched_class __sched_class_highest[]; extern struct sched_class __sched_class_lowest[]; +extern const struct sched_class stop_sched_class; +extern const struct sched_class dl_sched_class; +extern const struct sched_class rt_sched_class; +extern const struct sched_class fair_sched_class; +extern const struct sched_class idle_sched_class; + +#ifdef CONFIG_SCHED_CLASS_EXT +extern const struct sched_class ext_sched_class; + +DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */ +DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */ + +#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled) +#define scx_switched_all() static_branch_unlikely(&__scx_switched_all) +#else /* !CONFIG_SCHED_CLASS_EXT */ +#define scx_enabled() false +#define scx_switched_all() false +#endif /* !CONFIG_SCHED_CLASS_EXT */ + +/* + * Iterate only active classes. SCX can take over all fair tasks or be + * completely disabled. If the former, skip fair. If the latter, skip SCX. + */ +static inline const struct sched_class *next_active_class(const struct sched_class *class) +{ + class++; +#ifdef CONFIG_SCHED_CLASS_EXT + if (scx_switched_all() && class == &fair_sched_class) + class++; + if (!scx_enabled() && class == &ext_sched_class) + class++; +#endif + return class; +} + #define for_class_range(class, _from, _to) \ for (class = (_from); class < (_to); class++) #define for_each_class(class) \ for_class_range(class, __sched_class_highest, __sched_class_lowest) -#define sched_class_above(_a, _b) ((_a) < (_b)) +#define for_active_class_range(class, _from, _to) \ + for (class = (_from); class != (_to); class = next_active_class(class)) -extern const struct sched_class stop_sched_class; -extern const struct sched_class dl_sched_class; -extern const struct sched_class rt_sched_class; -extern const struct sched_class fair_sched_class; -extern const struct sched_class idle_sched_class; +#define for_each_active_class(class) \ + for_active_class_range(class, __sched_class_highest, __sched_class_lowest) + +#define sched_class_above(_a, _b) ((_a) < (_b)) static inline bool sched_stop_runnable(struct rq *rq) { @@ -2467,6 +2585,19 @@ extern void sched_balance_trigger(struct rq *rq); extern int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx); extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx); +static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu) +{ + /* When not in the task's cpumask, no point in looking further. */ + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + return false; + + /* Can @cpu run a user thread? */ + if (!(p->flags & PF_KTHREAD) && !task_cpu_possible(cpu, p)) + return false; + + return true; +} + static inline cpumask_t *alloc_user_cpus_ptr(int node) { /* @@ -2500,6 +2631,11 @@ extern int push_cpu_stop(void *arg); #else /* !CONFIG_SMP: */ +static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu) +{ + return true; +} + static inline int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx) { @@ -2553,8 +2689,6 @@ extern void init_sched_dl_class(void); extern void init_sched_rt_class(void); extern void init_sched_fair_class(void); -extern void reweight_task(struct task_struct *p, const struct load_weight *lw); - extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -3154,6 +3288,8 @@ static inline unsigned long cpu_util_rt(struct rq *rq) return READ_ONCE(rq->avg_rt.util_avg); } +#else /* !CONFIG_SMP */ +static inline bool update_other_load_avgs(struct rq *rq) { return false; } #endif /* CONFIG_SMP */ #ifdef CONFIG_UCLAMP_TASK @@ -3664,6 +3800,8 @@ extern void set_load_weight(struct task_struct *p, bool update_load); extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags); extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags); +extern void check_class_changing(struct rq *rq, struct task_struct *p, + const struct sched_class *prev_class); extern void check_class_changed(struct rq *rq, struct task_struct *p, const struct sched_class *prev_class, int oldprio); @@ -3684,4 +3822,24 @@ static inline void balance_callbacks(struct rq *rq, struct balance_callback *hea #endif +#ifdef CONFIG_SCHED_CLASS_EXT +/* + * Used by SCX in the enable/disable paths to move tasks between sched_classes + * and establish invariants. + */ +struct sched_enq_and_set_ctx { + struct task_struct *p; + int queue_flags; + bool queued; + bool running; +}; + +void sched_deq_and_put_task(struct task_struct *p, int queue_flags, + struct sched_enq_and_set_ctx *ctx); +void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx); + +#endif /* CONFIG_SCHED_CLASS_EXT */ + +#include "ext.h" + #endif /* _KERNEL_SCHED_SCHED_H */ |