diff options
| author | Tejun Heo <[email protected]> | 2024-06-18 10:09:18 -1000 |
|---|---|---|
| committer | Tejun Heo <[email protected]> | 2024-06-18 10:09:18 -1000 |
| commit | 7bb6f0810ecfb73a9d7a2ca56fb001e0201a6758 (patch) | |
| tree | 8d229f62d4cabab1cdd1c397f4383eb6772bc555 /kernel | |
| parent | 8a010b81b3a50b033fc3cddc613517abda586cbe (diff) | |
sched_ext: Allow BPF schedulers to disallow specific tasks from joining SCHED_EXT
BPF schedulers might not want to schedule certain tasks - e.g. kernel
threads. This patch adds p->scx.disallow which can be set by BPF schedulers
in such cases. The field can be changed anytime and setting it in
ops.prep_enable() guarantees that the task can never be scheduled by
sched_ext.
scx_qmap is updated with the -d option to disallow a specific PID:
# echo $$
1092
# grep -E '(policy)|(ext\.enabled)' /proc/self/sched
policy : 0
ext.enabled : 0
# ./set-scx 1092
# grep -E '(policy)|(ext\.enabled)' /proc/self/sched
policy : 7
ext.enabled : 0
Run "scx_qmap -p -d 1092" in another terminal.
# cat /sys/kernel/sched_ext/nr_rejected
1
# grep -E '(policy)|(ext\.enabled)' /proc/self/sched
policy : 0
ext.enabled : 0
# ./set-scx 1092
setparam failed for 1092 (Permission denied)
- v4: Refreshed on top of tip:sched/core.
- v3: Update description to reflect /sys/kernel/sched_ext interface change.
- v2: Use atomic_long_t instead of atomic64_t for scx_kick_cpus_pnt_seqs to
accommodate 32bit archs.
Signed-off-by: Tejun Heo <[email protected]>
Suggested-by: Barret Rhoden <[email protected]>
Reviewed-by: David Vernet <[email protected]>
Acked-by: Josh Don <[email protected]>
Acked-by: Hao Luo <[email protected]>
Acked-by: Barret Rhoden <[email protected]>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/ext.c | 50 | ||||
| -rw-r--r-- | kernel/sched/ext.h | 2 | ||||
| -rw-r--r-- | kernel/sched/syscalls.c | 4 |
3 files changed, 56 insertions, 0 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 3dc515b3351f..8ff30b80e862 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -483,6 +483,8 @@ struct static_key_false scx_has_op[SCX_OPI_END] = static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE); static struct scx_exit_info *scx_exit_info; +static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0); + /* * The maximum amount of time in jiffies that a task may be runnable without * being scheduled on a CPU. If this timeout is exceeded, it will trigger @@ -2332,6 +2334,8 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool { int ret; + p->scx.disallow = false; + if (SCX_HAS_OP(init_task)) { struct scx_init_task_args args = { .fork = fork, @@ -2346,6 +2350,27 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool scx_set_task_state(p, SCX_TASK_INIT); + if (p->scx.disallow) { + struct rq *rq; + struct rq_flags rf; + + rq = task_rq_lock(p, &rf); + + /* + * We're either in fork or load path and @p->policy will be + * applied right after. Reverting @p->policy here and rejecting + * %SCHED_EXT transitions from scx_check_setscheduler() + * guarantees that if ops.init_task() sets @p->disallow, @p can + * never be in SCX. + */ + if (p->policy == SCHED_EXT) { + p->policy = SCHED_NORMAL; + atomic_long_inc(&scx_nr_rejected); + } + + task_rq_unlock(rq, p, &rf); + } + p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT; return 0; } @@ -2549,6 +2574,18 @@ static void switched_from_scx(struct rq *rq, struct task_struct *p) static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {} static void switched_to_scx(struct rq *rq, struct task_struct *p) {} +int scx_check_setscheduler(struct task_struct *p, int policy) +{ + lockdep_assert_rq_held(task_rq(p)); + + /* if disallow, reject transitioning into SCX */ + if (scx_enabled() && READ_ONCE(p->scx.disallow) && + p->policy != policy && policy == SCHED_EXT) + return -EACCES; + + return 0; +} + /* * Omitted operations: * @@ -2703,9 +2740,17 @@ static ssize_t scx_attr_switch_all_show(struct kobject *kobj, } SCX_ATTR(switch_all); +static ssize_t scx_attr_nr_rejected_show(struct kobject *kobj, + struct kobj_attribute *ka, char *buf) +{ + return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_nr_rejected)); +} +SCX_ATTR(nr_rejected); + static struct attribute *scx_global_attrs[] = { &scx_attr_state.attr, &scx_attr_switch_all.attr, + &scx_attr_nr_rejected.attr, NULL, }; @@ -3178,6 +3223,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) atomic_set(&scx_exit_kind, SCX_EXIT_NONE); scx_warned_zero_slice = false; + atomic_long_set(&scx_nr_rejected, 0); + /* * Keep CPUs stable during enable so that the BPF scheduler can track * online CPUs by watching ->on/offline_cpu() after ->init(). @@ -3476,6 +3523,9 @@ static int bpf_scx_btf_struct_access(struct bpf_verifier_log *log, if (off >= offsetof(struct task_struct, scx.slice) && off + size <= offsetofend(struct task_struct, scx.slice)) return SCALAR_VALUE; + if (off >= offsetof(struct task_struct, scx.disallow) && + off + size <= offsetofend(struct task_struct, scx.disallow)) + return SCALAR_VALUE; } return -EACCES; diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 56fcdb0b2c05..33a9f7fe5832 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -35,6 +35,7 @@ void scx_pre_fork(struct task_struct *p); int scx_fork(struct task_struct *p); void scx_post_fork(struct task_struct *p); void scx_cancel_fork(struct task_struct *p); +int scx_check_setscheduler(struct task_struct *p, int policy); bool task_should_scx(struct task_struct *p); void init_sched_ext_class(void); @@ -72,6 +73,7 @@ static inline void scx_pre_fork(struct task_struct *p) {} static inline int scx_fork(struct task_struct *p) { return 0; } static inline void scx_post_fork(struct task_struct *p) {} static inline void scx_cancel_fork(struct task_struct *p) {} +static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; } static inline bool task_on_scx(const struct task_struct *p) { return false; } static inline void init_sched_ext_class(void) {} diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 18d44d180db1..4fa59c9f69ac 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -714,6 +714,10 @@ recheck: goto unlock; } + retval = scx_check_setscheduler(p, policy); + if (retval) + goto unlock; + /* * If not changing anything there's no need to proceed further, * but store a possible modification of reset_on_fork. |