diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-27 14:03:21 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-27 14:03:21 -0700 |
commit | ed3b7923a816ded62dccef377c9ee346c7d3b1b4 (patch) | |
tree | 41d46fc399c231088a370e7b3e488a93342fa681 /include | |
parent | e8f75c0270d930ef675fee22d74d1a3250e96962 (diff) | |
parent | ebb83d84e49b54369b0db67136a5fe1087124dcc (diff) |
Merge tag 'sched-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"Scheduler SMP load-balancer improvements:
- Avoid unnecessary migrations within SMT domains on hybrid systems.
Problem:
On hybrid CPU systems, (processors with a mixture of
higher-frequency SMT cores and lower-frequency non-SMT cores),
under the old code lower-priority CPUs pulled tasks from the
higher-priority cores if more than one SMT sibling was busy -
resulting in many unnecessary task migrations.
Solution:
The new code improves the load balancer to recognize SMT cores
with more than one busy sibling and allows lower-priority CPUs
to pull tasks, which avoids superfluous migrations and lets
lower-priority cores inspect all SMT siblings for the busiest
queue.
- Implement the 'runnable boosting' feature in the EAS balancer:
consider CPU contention in frequency, EAS max util & load-balance
busiest CPU selection.
This improves CPU utilization for certain workloads, while leaves
other key workloads unchanged.
Scheduler infrastructure improvements:
- Rewrite the scheduler topology setup code by consolidating it into
the build_sched_topology() helper function and building it
dynamically on the fly.
- Resolve the local_clock() vs. noinstr complications by rewriting
the code: provide separate sched_clock_noinstr() and
local_clock_noinstr() functions to be used in instrumentation code,
and make sure it is all instrumentation-safe.
Fixes:
- Fix a kthread_park() race with wait_woken()
- Fix misc wait_task_inactive() bugs unearthed by the -rt merge:
- Fix UP PREEMPT bug by unifying the SMP and UP implementations
- Fix task_struct::saved_state handling
- Fix various rq clock update bugs, unearthed by turning on the rq
clock debugging code.
- Fix the PSI WINDOW_MIN_US trigger limit, which was easy to trigger
by creating enough cgroups, by removing the warnign and restricting
window size triggers to PSI file write-permission or
CAP_SYS_RESOURCE.
- Propagate SMT flags in the topology when removing degenerate domain
- Fix grub_reclaim() calculation bug in the deadline scheduler code
- Avoid resetting the min update period when it is unnecessary, in
psi_trigger_destroy().
- Don't balance a task to its current running CPU in load_balance(),
which was possible on certain NUMA topologies with overlapping
groups.
- Fix the sched-debug printing of rq->nr_uninterruptible
Cleanups:
- Address various -Wmissing-prototype warnings, as a preparation to
(maybe) enable this warning in the future.
- Remove unused code
- Mark more functions __init
- Fix shadow-variable warnings"
* tag 'sched-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (50 commits)
sched/core: Avoid multiple calling update_rq_clock() in __cfsb_csd_unthrottle()
sched/core: Avoid double calling update_rq_clock() in __balance_push_cpu_stop()
sched/core: Fixed missing rq clock update before calling set_rq_offline()
sched/deadline: Update GRUB description in the documentation
sched/deadline: Fix bandwidth reclaim equation in GRUB
sched/wait: Fix a kthread_park race with wait_woken()
sched/topology: Mark set_sched_topology() __init
sched/fair: Rename variable cpu_util eff_util
arm64/arch_timer: Fix MMIO byteswap
sched/fair, cpufreq: Introduce 'runnable boosting'
sched/fair: Refactor CPU utilization functions
cpuidle: Use local_clock_noinstr()
sched/clock: Provide local_clock_noinstr()
x86/tsc: Provide sched_clock_noinstr()
clocksource: hyper-v: Provide noinstr sched_clock()
clocksource: hyper-v: Adjust hv_read_tsc_page_tsc() to avoid special casing U64_MAX
x86/vdso: Fix gettimeofday masking
math64: Always inline u128 version of mul_u64_u64_shr()
s390/time: Provide sched_clock_noinstr()
loongarch: Provide noinstr sched_clock_read()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/clocksource/hyperv_timer.h | 24 | ||||
-rw-r--r-- | include/linux/kthread.h | 1 | ||||
-rw-r--r-- | include/linux/math64.h | 2 | ||||
-rw-r--r-- | include/linux/rbtree_latch.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 7 | ||||
-rw-r--r-- | include/linux/sched/clock.h | 17 | ||||
-rw-r--r-- | include/linux/sched/sd_flags.h | 5 | ||||
-rw-r--r-- | include/linux/sched/topology.h | 2 | ||||
-rw-r--r-- | include/linux/seqlock.h | 15 |
9 files changed, 40 insertions, 35 deletions
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index 536f897375d0..6cdc873ac907 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -38,8 +38,9 @@ extern void hv_remap_tsc_clocksource(void); extern unsigned long hv_get_tsc_pfn(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline notrace u64 -hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) +static __always_inline bool +hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + u64 *cur_tsc, u64 *time) { u64 scale, offset; u32 sequence; @@ -63,7 +64,7 @@ hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) do { sequence = READ_ONCE(tsc_pg->tsc_sequence); if (!sequence) - return U64_MAX; + return false; /* * Make sure we read sequence before we read other values from * TSC page. @@ -82,15 +83,8 @@ hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc) } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline notrace u64 -hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); + *time = mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; + return true; } #else /* CONFIG_HYPERV_TIMER */ @@ -104,10 +98,10 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) return NULL; } -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) +static __always_inline bool +hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc, u64 *time) { - return U64_MAX; + return false; } static inline int hv_stimer_cleanup(unsigned int cpu) { return 0; } diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 30e5bec81d2b..f1f95a71a4bc 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -89,6 +89,7 @@ int kthread_stop(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); bool __kthread_should_park(struct task_struct *k); +bool kthread_should_stop_or_park(void); bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_func(struct task_struct *k); void *kthread_data(struct task_struct *k); diff --git a/include/linux/math64.h b/include/linux/math64.h index 8b9191a2849e..bf74478926d4 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -168,7 +168,7 @@ static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) #endif /* mul_u64_u32_shr */ #ifndef mul_u64_u64_shr -static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) +static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) { return (u64)(((unsigned __int128)a * mul) >> shift); } diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index 3d1a9e716b80..6a0999c26c7c 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root, do { seq = raw_read_seqcount_latch(&root->seq); node = __lt_find(key, root, seq & 1, ops->comp); - } while (read_seqcount_latch_retry(&root->seq, seq)); + } while (raw_read_seqcount_latch_retry(&root->seq, seq)); return node; } diff --git a/include/linux/sched.h b/include/linux/sched.h index eed5d65b8d1f..1292d38d66cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2006,15 +2006,12 @@ static __always_inline void scheduler_ipi(void) */ preempt_fold_need_resched(); } -extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); #else static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) -{ - return 1; -} #endif +extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); + /* * Set thread flags in other task's structures. * See asm/thread_info.h for TIF_xxxx flags available: diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index ca008f7d3615..196f0ca351a2 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -12,7 +12,16 @@ * * Please use one of the three interfaces below. */ -extern unsigned long long notrace sched_clock(void); +extern u64 sched_clock(void); + +#if defined(CONFIG_ARCH_WANTS_NO_INSTR) || defined(CONFIG_GENERIC_SCHED_CLOCK) +extern u64 sched_clock_noinstr(void); +#else +static __always_inline u64 sched_clock_noinstr(void) +{ + return sched_clock(); +} +#endif /* * See the comment in kernel/sched/clock.c @@ -45,6 +54,11 @@ static inline u64 cpu_clock(int cpu) return sched_clock(); } +static __always_inline u64 local_clock_noinstr(void) +{ + return sched_clock_noinstr(); +} + static __always_inline u64 local_clock(void) { return sched_clock(); @@ -79,6 +93,7 @@ static inline u64 cpu_clock(int cpu) return sched_clock_cpu(cpu); } +extern u64 local_clock_noinstr(void); extern u64 local_clock(void); #endif diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index 57bde66d95f7..fad77b5172e2 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) /* * Place busy tasks earlier in the domain * - * SHARED_CHILD: Usually set on the SMT level. Technically could be set further - * up, but currently assumed to be set from the base domain - * upwards (see update_top_cache_domain()). * NEEDS_GROUPS: Load balancing flag. */ -SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) +SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS) /* * Prefer to place tasks in a sibling domain diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 816df6cc444e..67b573d5bf28 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -203,7 +203,7 @@ struct sched_domain_topology_level { #endif }; -extern void set_sched_topology(struct sched_domain_topology_level *tl); +extern void __init set_sched_topology(struct sched_domain_topology_level *tl); #ifdef CONFIG_SCHED_DEBUG # define SD_INIT_NAME(type) .name = #type diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 3926e9027947..987a59d977c5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -671,9 +671,9 @@ typedef struct { * * Return: sequence counter raw value. Use the lowest bit as an index for * picking which data copy to read. The full counter must then be checked - * with read_seqcount_latch_retry(). + * with raw_read_seqcount_latch_retry(). */ -static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) +static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) { /* * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). @@ -683,16 +683,17 @@ static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) } /** - * read_seqcount_latch_retry() - end a seqcount_latch_t read section + * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section * @s: Pointer to seqcount_latch_t * @start: count, from raw_read_seqcount_latch() * * Return: true if a read section retry is required, else false */ -static inline int -read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) +static __always_inline int +raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) { - return read_seqcount_retry(&s->seqcount, start); + smp_rmb(); + return unlikely(READ_ONCE(s->seqcount.sequence) != start); } /** @@ -752,7 +753,7 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * entry = data_query(latch->data[idx], ...); * * // This includes needed smp_rmb() - * } while (read_seqcount_latch_retry(&latch->seq, seq)); + * } while (raw_read_seqcount_latch_retry(&latch->seq, seq)); * * return entry; * } |