diff options
-rw-r--r-- | include/linux/cgroup.h | 2 | ||||
-rw-r--r-- | include/trace/events/cgroup.h | 48 | ||||
-rw-r--r-- | kernel/cgroup/rstat.c | 47 |
3 files changed, 88 insertions, 9 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 34aaf0e87def..2150ca60394b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -690,7 +690,7 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); void cgroup_rstat_flush(struct cgroup *cgrp); void cgroup_rstat_flush_hold(struct cgroup *cgrp); -void cgroup_rstat_flush_release(void); +void cgroup_rstat_flush_release(struct cgroup *cgrp); /* * Basic resource stats. diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index dd7d7c9efecd..13f375800135 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h @@ -204,6 +204,54 @@ DEFINE_EVENT(cgroup_event, cgroup_notify_frozen, TP_ARGS(cgrp, path, val) ); +DECLARE_EVENT_CLASS(cgroup_rstat, + + TP_PROTO(struct cgroup *cgrp, int cpu_in_loop, bool contended), + + TP_ARGS(cgrp, cpu_in_loop, contended), + + TP_STRUCT__entry( + __field( int, root ) + __field( int, level ) + __field( u64, id ) + __field( int, cpu_in_loop ) + __field( bool, contended ) + ), + + TP_fast_assign( + __entry->root = cgrp->root->hierarchy_id; + __entry->id = cgroup_id(cgrp); + __entry->level = cgrp->level; + __entry->cpu_in_loop = cpu_in_loop; + __entry->contended = contended; + ), + + TP_printk("root=%d id=%llu level=%d cpu_in_loop=%d lock contended:%d", + __entry->root, __entry->id, __entry->level, + __entry->cpu_in_loop, __entry->contended) +); + +DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended, + + TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), + + TP_ARGS(cgrp, cpu, contended) +); + +DEFINE_EVENT(cgroup_rstat, cgroup_rstat_locked, + + TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), + + TP_ARGS(cgrp, cpu, contended) +); + +DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock, + + TP_PROTO(struct cgroup *cgrp, int cpu, bool contended), + + TP_ARGS(cgrp, cpu, contended) +); + #endif /* _TRACE_CGROUP_H */ /* This part must be outside protection */ diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 07e2284bb499..ff68c904e647 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -7,6 +7,8 @@ #include <linux/btf.h> #include <linux/btf_ids.h> +#include <trace/events/cgroup.h> + static DEFINE_SPINLOCK(cgroup_rstat_lock); static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock); @@ -222,6 +224,35 @@ __weak noinline void bpf_rstat_flush(struct cgroup *cgrp, __bpf_hook_end(); +/* + * Helper functions for locking cgroup_rstat_lock. + * + * This makes it easier to diagnose locking issues and contention in + * production environments. The parameter @cpu_in_loop indicate lock + * was released and re-taken when collection data from the CPUs. The + * value -1 is used when obtaining the main lock else this is the CPU + * number processed last. + */ +static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop) + __acquires(&cgroup_rstat_lock) +{ + bool contended; + + contended = !spin_trylock_irq(&cgroup_rstat_lock); + if (contended) { + trace_cgroup_rstat_lock_contended(cgrp, cpu_in_loop, contended); + spin_lock_irq(&cgroup_rstat_lock); + } + trace_cgroup_rstat_locked(cgrp, cpu_in_loop, contended); +} + +static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop) + __releases(&cgroup_rstat_lock) +{ + trace_cgroup_rstat_unlock(cgrp, cpu_in_loop, false); + spin_unlock_irq(&cgroup_rstat_lock); +} + /* see cgroup_rstat_flush() */ static void cgroup_rstat_flush_locked(struct cgroup *cgrp) __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock) @@ -248,10 +279,10 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp) /* play nice and yield if necessary */ if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) { - spin_unlock_irq(&cgroup_rstat_lock); + __cgroup_rstat_unlock(cgrp, cpu); if (!cond_resched()) cpu_relax(); - spin_lock_irq(&cgroup_rstat_lock); + __cgroup_rstat_lock(cgrp, cpu); } } } @@ -273,9 +304,9 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp) { might_sleep(); - spin_lock_irq(&cgroup_rstat_lock); + __cgroup_rstat_lock(cgrp, -1); cgroup_rstat_flush_locked(cgrp); - spin_unlock_irq(&cgroup_rstat_lock); + __cgroup_rstat_unlock(cgrp, -1); } /** @@ -291,17 +322,17 @@ void cgroup_rstat_flush_hold(struct cgroup *cgrp) __acquires(&cgroup_rstat_lock) { might_sleep(); - spin_lock_irq(&cgroup_rstat_lock); + __cgroup_rstat_lock(cgrp, -1); cgroup_rstat_flush_locked(cgrp); } /** * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold() */ -void cgroup_rstat_flush_release(void) +void cgroup_rstat_flush_release(struct cgroup *cgrp) __releases(&cgroup_rstat_lock) { - spin_unlock_irq(&cgroup_rstat_lock); + __cgroup_rstat_unlock(cgrp, -1); } int cgroup_rstat_init(struct cgroup *cgrp) @@ -533,7 +564,7 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) #ifdef CONFIG_SCHED_CORE forceidle_time = cgrp->bstat.forceidle_sum; #endif - cgroup_rstat_flush_release(); + cgroup_rstat_flush_release(cgrp); } else { root_cgroup_cputime(&bstat); usage = bstat.cputime.sum_exec_runtime; |