aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h190
1 files changed, 174 insertions, 16 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3744f16a1293..8063db62b027 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -193,9 +193,18 @@ static inline int idle_policy(int policy)
return policy == SCHED_IDLE;
}
+static inline int normal_policy(int policy)
+{
+#ifdef CONFIG_SCHED_CLASS_EXT
+ if (policy == SCHED_EXT)
+ return true;
+#endif
+ return policy == SCHED_NORMAL;
+}
+
static inline int fair_policy(int policy)
{
- return policy == SCHED_NORMAL || policy == SCHED_BATCH;
+ return normal_policy(policy) || policy == SCHED_BATCH;
}
static inline int rt_policy(int policy)
@@ -246,6 +255,24 @@ static inline void update_avg(u64 *avg, u64 sample)
(val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
/*
+ * cgroup weight knobs should use the common MIN, DFL and MAX values which are
+ * 1, 100 and 10000 respectively. While it loses a bit of range on both ends, it
+ * maps pretty well onto the shares value used by scheduler and the round-trip
+ * conversions preserve the original value over the entire range.
+ */
+static inline unsigned long sched_weight_from_cgroup(unsigned long cgrp_weight)
+{
+ return DIV_ROUND_CLOSEST_ULL(cgrp_weight * 1024, CGROUP_WEIGHT_DFL);
+}
+
+static inline unsigned long sched_weight_to_cgroup(unsigned long weight)
+{
+ return clamp_t(unsigned long,
+ DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024),
+ CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
+}
+
+/*
* !! For sched_setattr_nocheck() (kernel) only !!
*
* This is actually gross. :(
@@ -432,6 +459,11 @@ struct task_group {
struct rt_bandwidth rt_bandwidth;
#endif
+#ifdef CONFIG_EXT_GROUP_SCHED
+ u32 scx_flags; /* SCX_TG_* */
+ u32 scx_weight;
+#endif
+
struct rcu_head rcu;
struct list_head list;
@@ -456,7 +488,7 @@ struct task_group {
};
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED_WEIGHT
#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
/*
@@ -487,6 +519,11 @@ static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
return walk_tg_tree_from(&root_task_group, down, up, data);
}
+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
+{
+ return css ? container_of(css, struct task_group, css) : NULL;
+}
+
extern int tg_nop(struct task_group *tg, void *data);
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -543,6 +580,8 @@ extern void set_task_rq_fair(struct sched_entity *se,
static inline void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next) { }
#endif /* CONFIG_SMP */
+#else /* !CONFIG_FAIR_GROUP_SCHED */
+static inline int sched_group_set_shares(struct task_group *tg, unsigned long shares) { return 0; }
#endif /* CONFIG_FAIR_GROUP_SCHED */
#else /* CONFIG_CGROUP_SCHED */
@@ -596,6 +635,11 @@ do { \
# define u64_u32_load(var) u64_u32_load_copy(var, var##_copy)
# define u64_u32_store(var, val) u64_u32_store_copy(var, var##_copy, val)
+struct balance_callback {
+ struct balance_callback *next;
+ void (*func)(struct rq *rq);
+};
+
/* CFS-related fields in a runqueue */
struct cfs_rq {
struct load_weight load;
@@ -695,6 +739,44 @@ struct cfs_rq {
#endif /* CONFIG_FAIR_GROUP_SCHED */
};
+#ifdef CONFIG_SCHED_CLASS_EXT
+/* scx_rq->flags, protected by the rq lock */
+enum scx_rq_flags {
+ /*
+ * A hotplugged CPU starts scheduling before rq_online_scx(). Track
+ * ops.cpu_on/offline() state so that ops.enqueue/dispatch() are called
+ * only while the BPF scheduler considers the CPU to be online.
+ */
+ SCX_RQ_ONLINE = 1 << 0,
+ SCX_RQ_CAN_STOP_TICK = 1 << 1,
+ SCX_RQ_BAL_KEEP = 1 << 2, /* balance decided to keep current */
+ SCX_RQ_BYPASSING = 1 << 3,
+
+ SCX_RQ_IN_WAKEUP = 1 << 16,
+ SCX_RQ_IN_BALANCE = 1 << 17,
+};
+
+struct scx_rq {
+ struct scx_dispatch_q local_dsq;
+ struct list_head runnable_list; /* runnable tasks on this rq */
+ struct list_head ddsp_deferred_locals; /* deferred ddsps from enq */
+ unsigned long ops_qseq;
+ u64 extra_enq_flags; /* see move_task_to_local_dsq() */
+ u32 nr_running;
+ u32 flags;
+ u32 cpuperf_target; /* [0, SCHED_CAPACITY_SCALE] */
+ bool cpu_released;
+ cpumask_var_t cpus_to_kick;
+ cpumask_var_t cpus_to_kick_if_idle;
+ cpumask_var_t cpus_to_preempt;
+ cpumask_var_t cpus_to_wait;
+ unsigned long pnt_seq;
+ struct balance_callback deferred_bal_cb;
+ struct irq_work deferred_irq_work;
+ struct irq_work kick_cpus_irq_work;
+};
+#endif /* CONFIG_SCHED_CLASS_EXT */
+
static inline int rt_bandwidth_enabled(void)
{
return sysctl_sched_rt_runtime >= 0;
@@ -1001,11 +1083,6 @@ struct uclamp_rq {
DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
#endif /* CONFIG_UCLAMP_TASK */
-struct balance_callback {
- struct balance_callback *next;
- void (*func)(struct rq *rq);
-};
-
/*
* This is the main, per-CPU runqueue data structure.
*
@@ -1048,6 +1125,9 @@ struct rq {
struct cfs_rq cfs;
struct rt_rq rt;
struct dl_rq dl;
+#ifdef CONFIG_SCHED_CLASS_EXT
+ struct scx_rq scx;
+#endif
struct sched_dl_entity fair_server;
@@ -2302,6 +2382,7 @@ struct sched_class {
void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags);
+ int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
struct task_struct *(*pick_task)(struct rq *rq);
/*
* Optional! When implemented pick_next_task() should be equivalent to:
@@ -2318,7 +2399,6 @@ struct sched_class {
void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
#ifdef CONFIG_SMP
- int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
@@ -2342,8 +2422,11 @@ struct sched_class {
* cannot assume the switched_from/switched_to pair is serialized by
* rq->lock. They are however serialized by p->pi_lock.
*/
+ void (*switching_to) (struct rq *this_rq, struct task_struct *task);
void (*switched_from)(struct rq *this_rq, struct task_struct *task);
void (*switched_to) (struct rq *this_rq, struct task_struct *task);
+ void (*reweight_task)(struct rq *this_rq, struct task_struct *task,
+ const struct load_weight *lw);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
int oldprio);
@@ -2416,19 +2499,54 @@ const struct sched_class name##_sched_class \
extern struct sched_class __sched_class_highest[];
extern struct sched_class __sched_class_lowest[];
+extern const struct sched_class stop_sched_class;
+extern const struct sched_class dl_sched_class;
+extern const struct sched_class rt_sched_class;
+extern const struct sched_class fair_sched_class;
+extern const struct sched_class idle_sched_class;
+
+#ifdef CONFIG_SCHED_CLASS_EXT
+extern const struct sched_class ext_sched_class;
+
+DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled); /* SCX BPF scheduler loaded */
+DECLARE_STATIC_KEY_FALSE(__scx_switched_all); /* all fair class tasks on SCX */
+
+#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
+#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
+#else /* !CONFIG_SCHED_CLASS_EXT */
+#define scx_enabled() false
+#define scx_switched_all() false
+#endif /* !CONFIG_SCHED_CLASS_EXT */
+
+/*
+ * Iterate only active classes. SCX can take over all fair tasks or be
+ * completely disabled. If the former, skip fair. If the latter, skip SCX.
+ */
+static inline const struct sched_class *next_active_class(const struct sched_class *class)
+{
+ class++;
+#ifdef CONFIG_SCHED_CLASS_EXT
+ if (scx_switched_all() && class == &fair_sched_class)
+ class++;
+ if (!scx_enabled() && class == &ext_sched_class)
+ class++;
+#endif
+ return class;
+}
+
#define for_class_range(class, _from, _to) \
for (class = (_from); class < (_to); class++)
#define for_each_class(class) \
for_class_range(class, __sched_class_highest, __sched_class_lowest)
-#define sched_class_above(_a, _b) ((_a) < (_b))
+#define for_active_class_range(class, _from, _to) \
+ for (class = (_from); class != (_to); class = next_active_class(class))
-extern const struct sched_class stop_sched_class;
-extern const struct sched_class dl_sched_class;
-extern const struct sched_class rt_sched_class;
-extern const struct sched_class fair_sched_class;
-extern const struct sched_class idle_sched_class;
+#define for_each_active_class(class) \
+ for_active_class_range(class, __sched_class_highest, __sched_class_lowest)
+
+#define sched_class_above(_a, _b) ((_a) < (_b))
static inline bool sched_stop_runnable(struct rq *rq)
{
@@ -2467,6 +2585,19 @@ extern void sched_balance_trigger(struct rq *rq);
extern int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx);
extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
+static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu)
+{
+ /* When not in the task's cpumask, no point in looking further. */
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
+ return false;
+
+ /* Can @cpu run a user thread? */
+ if (!(p->flags & PF_KTHREAD) && !task_cpu_possible(cpu, p))
+ return false;
+
+ return true;
+}
+
static inline cpumask_t *alloc_user_cpus_ptr(int node)
{
/*
@@ -2500,6 +2631,11 @@ extern int push_cpu_stop(void *arg);
#else /* !CONFIG_SMP: */
+static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu)
+{
+ return true;
+}
+
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
struct affinity_context *ctx)
{
@@ -2553,8 +2689,6 @@ extern void init_sched_dl_class(void);
extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
-extern void reweight_task(struct task_struct *p, const struct load_weight *lw);
-
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
@@ -3154,6 +3288,8 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
return READ_ONCE(rq->avg_rt.util_avg);
}
+#else /* !CONFIG_SMP */
+static inline bool update_other_load_avgs(struct rq *rq) { return false; }
#endif /* CONFIG_SMP */
#ifdef CONFIG_UCLAMP_TASK
@@ -3664,6 +3800,8 @@ extern void set_load_weight(struct task_struct *p, bool update_load);
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
+extern void check_class_changing(struct rq *rq, struct task_struct *p,
+ const struct sched_class *prev_class);
extern void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
int oldprio);
@@ -3684,4 +3822,24 @@ static inline void balance_callbacks(struct rq *rq, struct balance_callback *hea
#endif
+#ifdef CONFIG_SCHED_CLASS_EXT
+/*
+ * Used by SCX in the enable/disable paths to move tasks between sched_classes
+ * and establish invariants.
+ */
+struct sched_enq_and_set_ctx {
+ struct task_struct *p;
+ int queue_flags;
+ bool queued;
+ bool running;
+};
+
+void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
+ struct sched_enq_and_set_ctx *ctx);
+void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
+
+#endif /* CONFIG_SCHED_CLASS_EXT */
+
+#include "ext.h"
+
#endif /* _KERNEL_SCHED_SCHED_H */