diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 83 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 14 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 8 | ||||
-rw-r--r-- | kernel/sched/fair.c | 5 | ||||
-rw-r--r-- | kernel/sched/psi.c | 12 |
5 files changed, 68 insertions, 54 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b037f195473..df9f1fe5689b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3904,7 +3904,7 @@ void __noreturn do_task_dead(void) static inline void sched_submit_work(struct task_struct *tsk) { - if (!tsk->state || tsk_is_pi_blocked(tsk)) + if (!tsk->state) return; /* @@ -3920,6 +3920,9 @@ static inline void sched_submit_work(struct task_struct *tsk) preempt_enable_no_resched(); } + if (tsk_is_pi_blocked(tsk)) + return; + /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. @@ -5102,37 +5105,40 @@ out_unlock: return retval; } -static int sched_read_attr(struct sched_attr __user *uattr, - struct sched_attr *attr, - unsigned int usize) +/* + * Copy the kernel size attribute structure (which might be larger + * than what user-space knows about) to user-space. + * + * Note that all cases are valid: user-space buffer can be larger or + * smaller than the kernel-space buffer. The usual case is that both + * have the same size. + */ +static int +sched_attr_copy_to_user(struct sched_attr __user *uattr, + struct sched_attr *kattr, + unsigned int usize) { - int ret; + unsigned int ksize = sizeof(*kattr); if (!access_ok(uattr, usize)) return -EFAULT; /* - * If we're handed a smaller struct than we know of, - * ensure all the unknown bits are 0 - i.e. old - * user-space does not get uncomplete information. + * sched_getattr() ABI forwards and backwards compatibility: + * + * If usize == ksize then we just copy everything to user-space and all is good. + * + * If usize < ksize then we only copy as much as user-space has space for, + * this keeps ABI compatibility as well. We skip the rest. + * + * If usize > ksize then user-space is using a newer version of the ABI, + * which part the kernel doesn't know about. Just ignore it - tooling can + * detect the kernel's knowledge of attributes from the attr->size value + * which is set to ksize in this case. */ - if (usize < sizeof(*attr)) { - unsigned char *addr; - unsigned char *end; - - addr = (void *)attr + usize; - end = (void *)attr + sizeof(*attr); + kattr->size = min(usize, ksize); - for (; addr < end; addr++) { - if (*addr) - return -EFBIG; - } - - attr->size = usize; - } - - ret = copy_to_user(uattr, attr, attr->size); - if (ret) + if (copy_to_user(uattr, kattr, kattr->size)) return -EFAULT; return 0; @@ -5142,20 +5148,18 @@ static int sched_read_attr(struct sched_attr __user *uattr, * sys_sched_getattr - similar to sched_getparam, but with sched_attr * @pid: the pid in question. * @uattr: structure containing the extended parameters. - * @size: sizeof(attr) for fwd/bwd comp. + * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility. * @flags: for future extension. */ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, - unsigned int, size, unsigned int, flags) + unsigned int, usize, unsigned int, flags) { - struct sched_attr attr = { - .size = sizeof(struct sched_attr), - }; + struct sched_attr kattr = { }; struct task_struct *p; int retval; - if (!uattr || pid < 0 || size > PAGE_SIZE || - size < SCHED_ATTR_SIZE_VER0 || flags) + if (!uattr || pid < 0 || usize > PAGE_SIZE || + usize < SCHED_ATTR_SIZE_VER0 || flags) return -EINVAL; rcu_read_lock(); @@ -5168,25 +5172,24 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, if (retval) goto out_unlock; - attr.sched_policy = p->policy; + kattr.sched_policy = p->policy; if (p->sched_reset_on_fork) - attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; + kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; if (task_has_dl_policy(p)) - __getparam_dl(p, &attr); + __getparam_dl(p, &kattr); else if (task_has_rt_policy(p)) - attr.sched_priority = p->rt_priority; + kattr.sched_priority = p->rt_priority; else - attr.sched_nice = task_nice(p); + kattr.sched_nice = task_nice(p); #ifdef CONFIG_UCLAMP_TASK - attr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; - attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; + kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; + kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; #endif rcu_read_unlock(); - retval = sched_read_attr(uattr, &attr, size); - return retval; + return sched_attr_copy_to_user(uattr, &kattr, usize); out_unlock: rcu_read_unlock(); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 636ca6f88c8e..867b4bb6d4be 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -40,6 +40,7 @@ struct sugov_policy { struct task_struct *thread; bool work_in_progress; + bool limits_changed; bool need_freq_update; }; @@ -89,8 +90,11 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) !cpufreq_this_cpu_can_update(sg_policy->policy)) return false; - if (unlikely(sg_policy->need_freq_update)) + if (unlikely(sg_policy->limits_changed)) { + sg_policy->limits_changed = false; + sg_policy->need_freq_update = true; return true; + } delta_ns = time - sg_policy->last_freq_update_time; @@ -437,7 +441,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) - sg_policy->need_freq_update = true; + sg_policy->limits_changed = true; } static void sugov_update_single(struct update_util_data *hook, u64 time, @@ -457,7 +461,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; - busy = sugov_cpu_is_busy(sg_cpu); + /* Limits may have changed, don't skip frequency update */ + busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu); util = sugov_get_util(sg_cpu); max = sg_cpu->max; @@ -831,6 +836,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->last_freq_update_time = 0; sg_policy->next_freq = 0; sg_policy->work_in_progress = false; + sg_policy->limits_changed = false; sg_policy->need_freq_update = false; sg_policy->cached_raw_freq = 0; @@ -879,7 +885,7 @@ static void sugov_limits(struct cpufreq_policy *policy) mutex_unlock(&sg_policy->work_lock); } - sg_policy->need_freq_update = true; + sg_policy->limits_changed = true; } struct cpufreq_governor schedutil_gov = { diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index ef5b9f6b1d42..46122edd8552 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2088,17 +2088,13 @@ retry: } deactivate_task(rq, next_task, 0); - sub_running_bw(&next_task->dl, &rq->dl); - sub_rq_bw(&next_task->dl, &rq->dl); set_task_cpu(next_task, later_rq->cpu); - add_rq_bw(&next_task->dl, &later_rq->dl); /* * Update the later_rq clock here, because the clock is used * by the cpufreq_update_util() inside __add_running_bw(). */ update_rq_clock(later_rq); - add_running_bw(&next_task->dl, &later_rq->dl); activate_task(later_rq, next_task, ENQUEUE_NOCLOCK); ret = 1; @@ -2186,11 +2182,7 @@ static void pull_dl_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); - sub_running_bw(&p->dl, &src_rq->dl); - sub_rq_bw(&p->dl, &src_rq->dl); set_task_cpu(p, this_cpu); - add_rq_bw(&p->dl, &this_rq->dl); - add_running_bw(&p->dl, &this_rq->dl); activate_task(this_rq, p, 0); dmin = p->dl.deadline; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bc9cfeaac8bd..500f5db0de0b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4470,6 +4470,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) if (likely(cfs_rq->runtime_remaining > 0)) return; + if (cfs_rq->throttled) + return; /* * if we're unable to extend our runtime we resched so that the active * hierarchy can be throttled @@ -4673,6 +4675,9 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, if (!cfs_rq_throttled(cfs_rq)) goto next; + /* By the above check, this should never be true */ + SCHED_WARN_ON(cfs_rq->runtime_remaining > 0); + runtime = -cfs_rq->runtime_remaining + 1; if (runtime > remaining) runtime = remaining; diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7acc632c3b82..6e52b67b420e 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1051,7 +1051,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, if (!rcu_access_pointer(group->poll_kworker)) { struct sched_param param = { - .sched_priority = MAX_RT_PRIO - 1, + .sched_priority = 1, }; struct kthread_worker *kworker; @@ -1061,7 +1061,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, mutex_unlock(&group->trigger_lock); return ERR_CAST(kworker); } - sched_setscheduler(kworker->task, SCHED_FIFO, ¶m); + sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); kthread_init_delayed_work(&group->poll_work, psi_poll_work); rcu_assign_pointer(group->poll_kworker, kworker); @@ -1131,7 +1131,15 @@ static void psi_trigger_destroy(struct kref *ref) * deadlock while waiting for psi_poll_work to acquire trigger_lock */ if (kworker_to_destroy) { + /* + * After the RCU grace period has expired, the worker + * can no longer be found through group->poll_kworker. + * But it might have been already scheduled before + * that - deschedule it cleanly before destroying it. + */ kthread_cancel_delayed_work_sync(&group->poll_work); + atomic_set(&group->poll_scheduled, 0); + kthread_destroy_worker(kworker_to_destroy); } kfree(t); |