aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h243
1 files changed, 212 insertions, 31 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3e8df6d31c1e..ec7b3e0a2b20 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2224,6 +2224,10 @@ struct sched_class {
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p);
#endif
+
+#ifdef CONFIG_SCHED_CORE
+ int (*task_is_throttled)(struct task_struct *p, int cpu);
+#endif
};
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
@@ -3249,61 +3253,238 @@ static inline void update_current_exec_runtime(struct task_struct *curr,
}
#ifdef CONFIG_SCHED_MM_CID
-static inline int __mm_cid_get(struct mm_struct *mm)
+
+#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
+#define MM_CID_SCAN_DELAY 100 /* 100ms */
+
+extern raw_spinlock_t cid_lock;
+extern int use_cid_lock;
+
+extern void sched_mm_cid_migrate_from(struct task_struct *t);
+extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t);
+extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
+extern void init_sched_mm_cid(struct task_struct *t);
+
+static inline void __mm_cid_put(struct mm_struct *mm, int cid)
+{
+ if (cid < 0)
+ return;
+ cpumask_clear_cpu(cid, mm_cidmask(mm));
+}
+
+/*
+ * The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
+ * the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
+ * be held to transition to other states.
+ *
+ * State transitions synchronized with cmpxchg or try_cmpxchg need to be
+ * consistent across cpus, which prevents use of this_cpu_cmpxchg.
+ */
+static inline void mm_cid_put_lazy(struct task_struct *t)
+{
+ struct mm_struct *mm = t->mm;
+ struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
+ int cid;
+
+ lockdep_assert_irqs_disabled();
+ cid = __this_cpu_read(pcpu_cid->cid);
+ if (!mm_cid_is_lazy_put(cid) ||
+ !try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
+ return;
+ __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
+}
+
+static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
+{
+ struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
+ int cid, res;
+
+ lockdep_assert_irqs_disabled();
+ cid = __this_cpu_read(pcpu_cid->cid);
+ for (;;) {
+ if (mm_cid_is_unset(cid))
+ return MM_CID_UNSET;
+ /*
+ * Attempt transition from valid or lazy-put to unset.
+ */
+ res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
+ if (res == cid)
+ break;
+ cid = res;
+ }
+ return cid;
+}
+
+static inline void mm_cid_put(struct mm_struct *mm)
+{
+ int cid;
+
+ lockdep_assert_irqs_disabled();
+ cid = mm_cid_pcpu_unset(mm);
+ if (cid == MM_CID_UNSET)
+ return;
+ __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
+}
+
+static inline int __mm_cid_try_get(struct mm_struct *mm)
{
struct cpumask *cpumask;
int cid;
cpumask = mm_cidmask(mm);
- cid = cpumask_first_zero(cpumask);
- if (cid >= nr_cpu_ids)
+ /*
+ * Retry finding first zero bit if the mask is temporarily
+ * filled. This only happens during concurrent remote-clear
+ * which owns a cid without holding a rq lock.
+ */
+ for (;;) {
+ cid = cpumask_first_zero(cpumask);
+ if (cid < nr_cpu_ids)
+ break;
+ cpu_relax();
+ }
+ if (cpumask_test_and_set_cpu(cid, cpumask))
return -1;
- __cpumask_set_cpu(cid, cpumask);
return cid;
}
-static inline void mm_cid_put(struct mm_struct *mm, int cid)
+/*
+ * Save a snapshot of the current runqueue time of this cpu
+ * with the per-cpu cid value, allowing to estimate how recently it was used.
+ */
+static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
+{
+ struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
+
+ lockdep_assert_rq_held(rq);
+ WRITE_ONCE(pcpu_cid->time, rq->clock);
+}
+
+static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
{
- lockdep_assert_irqs_disabled();
- if (cid < 0)
- return;
- raw_spin_lock(&mm->cid_lock);
- __cpumask_clear_cpu(cid, mm_cidmask(mm));
- raw_spin_unlock(&mm->cid_lock);
+ int cid;
+
+ /*
+ * All allocations (even those using the cid_lock) are lock-free. If
+ * use_cid_lock is set, hold the cid_lock to perform cid allocation to
+ * guarantee forward progress.
+ */
+ if (!READ_ONCE(use_cid_lock)) {
+ cid = __mm_cid_try_get(mm);
+ if (cid >= 0)
+ goto end;
+ raw_spin_lock(&cid_lock);
+ } else {
+ raw_spin_lock(&cid_lock);
+ cid = __mm_cid_try_get(mm);
+ if (cid >= 0)
+ goto unlock;
+ }
+
+ /*
+ * cid concurrently allocated. Retry while forcing following
+ * allocations to use the cid_lock to ensure forward progress.
+ */
+ WRITE_ONCE(use_cid_lock, 1);
+ /*
+ * Set use_cid_lock before allocation. Only care about program order
+ * because this is only required for forward progress.
+ */
+ barrier();
+ /*
+ * Retry until it succeeds. It is guaranteed to eventually succeed once
+ * all newcoming allocations observe the use_cid_lock flag set.
+ */
+ do {
+ cid = __mm_cid_try_get(mm);
+ cpu_relax();
+ } while (cid < 0);
+ /*
+ * Allocate before clearing use_cid_lock. Only care about
+ * program order because this is for forward progress.
+ */
+ barrier();
+ WRITE_ONCE(use_cid_lock, 0);
+unlock:
+ raw_spin_unlock(&cid_lock);
+end:
+ mm_cid_snapshot_time(rq, mm);
+ return cid;
}
-static inline int mm_cid_get(struct mm_struct *mm)
+static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
{
- int ret;
+ struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
+ struct cpumask *cpumask;
+ int cid;
- lockdep_assert_irqs_disabled();
- raw_spin_lock(&mm->cid_lock);
- ret = __mm_cid_get(mm);
- raw_spin_unlock(&mm->cid_lock);
- return ret;
+ lockdep_assert_rq_held(rq);
+ cpumask = mm_cidmask(mm);
+ cid = __this_cpu_read(pcpu_cid->cid);
+ if (mm_cid_is_valid(cid)) {
+ mm_cid_snapshot_time(rq, mm);
+ return cid;
+ }
+ if (mm_cid_is_lazy_put(cid)) {
+ if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
+ __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
+ }
+ cid = __mm_cid_get(rq, mm);
+ __this_cpu_write(pcpu_cid->cid, cid);
+ return cid;
}
-static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next)
+static inline void switch_mm_cid(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
{
+ /*
+ * Provide a memory barrier between rq->curr store and load of
+ * {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
+ *
+ * Should be adapted if context_switch() is modified.
+ */
+ if (!next->mm) { // to kernel
+ /*
+ * user -> kernel transition does not guarantee a barrier, but
+ * we can use the fact that it performs an atomic operation in
+ * mmgrab().
+ */
+ if (prev->mm) // from user
+ smp_mb__after_mmgrab();
+ /*
+ * kernel -> kernel transition does not change rq->curr->mm
+ * state. It stays NULL.
+ */
+ } else { // to user
+ /*
+ * kernel -> user transition does not provide a barrier
+ * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
+ * Provide it here.
+ */
+ if (!prev->mm) // from kernel
+ smp_mb();
+ /*
+ * user -> user transition guarantees a memory barrier through
+ * switch_mm() when current->mm changes. If current->mm is
+ * unchanged, no barrier is needed.
+ */
+ }
if (prev->mm_cid_active) {
- if (next->mm_cid_active && next->mm == prev->mm) {
- /*
- * Context switch between threads in same mm, hand over
- * the mm_cid from prev to next.
- */
- next->mm_cid = prev->mm_cid;
- prev->mm_cid = -1;
- return;
- }
- mm_cid_put(prev->mm, prev->mm_cid);
+ mm_cid_snapshot_time(rq, prev->mm);
+ mm_cid_put_lazy(prev);
prev->mm_cid = -1;
}
if (next->mm_cid_active)
- next->mm_cid = mm_cid_get(next->mm);
+ next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
}
#else
-static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { }
+static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
+static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
+static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
+static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
+static inline void init_sched_mm_cid(struct task_struct *t) { }
#endif
#endif /* _KERNEL_SCHED_SCHED_H */