aboutsummaryrefslogtreecommitdiff
path: root/kernel/cgroup/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup/cgroup.c')
-rw-r--r--kernel/cgroup/cgroup.c205
1 files changed, 98 insertions, 107 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4d42f0cbc11e..1fb7f562289d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -57,6 +57,7 @@
#include <linux/file.h>
#include <linux/fs_parser.h>
#include <linux/sched/cputime.h>
+#include <linux/sched/deadline.h>
#include <linux/psi.h>
#include <net/sock.h>
@@ -312,8 +313,6 @@ bool cgroup_ssid_enabled(int ssid)
* masks of ancestors.
*
* - blkcg: blk-throttle becomes properly hierarchical.
- *
- * - debug: disallowed on the default hierarchy.
*/
bool cgroup_on_dfl(const struct cgroup *cgrp)
{
@@ -356,7 +355,7 @@ static bool cgroup_has_tasks(struct cgroup *cgrp)
return cgrp->nr_populated_csets;
}
-bool cgroup_is_threaded(struct cgroup *cgrp)
+static bool cgroup_is_threaded(struct cgroup *cgrp)
{
return cgrp->dom_cgrp != cgrp;
}
@@ -395,7 +394,7 @@ static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
}
/* is @cgrp root of a threaded subtree? */
-bool cgroup_is_thread_root(struct cgroup *cgrp)
+static bool cgroup_is_thread_root(struct cgroup *cgrp)
{
/* thread root should be a domain */
if (cgroup_is_threaded(cgrp))
@@ -494,28 +493,6 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
}
/**
- * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest
- *
- * Find and get @cgrp's css associated with @ss. If the css doesn't exist
- * or is offline, %NULL is returned.
- */
-static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
- struct cgroup_subsys *ss)
-{
- struct cgroup_subsys_state *css;
-
- rcu_read_lock();
- css = cgroup_css(cgrp, ss);
- if (css && !css_tryget_online(css))
- css = NULL;
- rcu_read_unlock();
-
- return css;
-}
-
-/**
* cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
@@ -618,7 +595,7 @@ EXPORT_SYMBOL_GPL(cgroup_get_e_css);
static void cgroup_get_live(struct cgroup *cgrp)
{
WARN_ON_ONCE(cgroup_is_dead(cgrp));
- css_get(&cgrp->self);
+ cgroup_get(cgrp);
}
/**
@@ -680,7 +657,7 @@ EXPORT_SYMBOL_GPL(of_css);
* @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
* @cgrp: the target cgroup to iterate css's of
*
- * Should be called under cgroup_[tree_]mutex.
+ * Should be called under cgroup_mutex.
*/
#define for_each_css(css, ssid, cgrp) \
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
@@ -690,21 +667,6 @@ EXPORT_SYMBOL_GPL(of_css);
else
/**
- * for_each_e_css - iterate all effective css's of a cgroup
- * @css: the iteration cursor
- * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
- * @cgrp: the target cgroup to iterate css's of
- *
- * Should be called under cgroup_[tree_]mutex.
- */
-#define for_each_e_css(css, ssid, cgrp) \
- for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
- if (!((css) = cgroup_e_css_by_mask(cgrp, \
- cgroup_subsys[(ssid)]))) \
- ; \
- else
-
-/**
* do_each_subsys_mask - filter for_each_subsys with a bitmask
* @ss: the iteration cursor
* @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
@@ -945,7 +907,7 @@ static void css_set_move_task(struct task_struct *task,
#define CSS_SET_HASH_BITS 7
static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
-static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
+static unsigned long css_set_hash(struct cgroup_subsys_state **css)
{
unsigned long key = 0UL;
struct cgroup_subsys *ss;
@@ -1086,7 +1048,7 @@ static bool compare_css_sets(struct css_set *cset,
*/
static struct css_set *find_existing_css_set(struct css_set *old_cset,
struct cgroup *cgrp,
- struct cgroup_subsys_state *template[])
+ struct cgroup_subsys_state **template)
{
struct cgroup_root *root = cgrp->root;
struct cgroup_subsys *ss;
@@ -1752,7 +1714,7 @@ static int css_populate_dir(struct cgroup_subsys_state *css)
struct cftype *cfts, *failed_cfts;
int ret;
- if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
+ if (css->flags & CSS_VISIBLE)
return 0;
if (!css->ss) {
@@ -2393,45 +2355,6 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
EXPORT_SYMBOL_GPL(cgroup_path_ns);
/**
- * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
- * @task: target task
- * @buf: the buffer to write the path into
- * @buflen: the length of the buffer
- *
- * Determine @task's cgroup on the first (the one with the lowest non-zero
- * hierarchy_id) cgroup hierarchy and copy its path into @buf. This
- * function grabs cgroup_mutex and shouldn't be used inside locks used by
- * cgroup controller callbacks.
- *
- * Return value is the same as kernfs_path().
- */
-int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
-{
- struct cgroup_root *root;
- struct cgroup *cgrp;
- int hierarchy_id = 1;
- int ret;
-
- cgroup_lock();
- spin_lock_irq(&css_set_lock);
-
- root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
-
- if (root) {
- cgrp = task_cgroup_from_root(task, root);
- ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
- } else {
- /* if no hierarchy exists, everyone is in "/" */
- ret = strscpy(buf, "/", buflen);
- }
-
- spin_unlock_irq(&css_set_lock);
- cgroup_unlock();
- return ret;
-}
-EXPORT_SYMBOL_GPL(task_cgroup_path);
-
-/**
* cgroup_attach_lock - Lock for ->attach()
* @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
*
@@ -2554,7 +2477,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
/*
* This function may be called both before and
- * after cgroup_taskset_migrate(). The two cases
+ * after cgroup_migrate_execute(). The two cases
* can be distinguished by looking at whether @cset
* has its ->mg_dst_cset set.
*/
@@ -2885,9 +2808,9 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
struct task_struct *task;
/*
- * Prevent freeing of tasks while we take a snapshot. Tasks that are
- * already PF_EXITING could be freed from underneath us unless we
- * take an rcu_read_lock.
+ * The following thread iteration should be inside an RCU critical
+ * section to prevent tasks from being freed while taking the snapshot.
+ * spin_lock_irq() implies RCU critical section here.
*/
spin_lock_irq(&css_set_lock);
task = leader;
@@ -3709,9 +3632,32 @@ static int cgroup_stat_show(struct seq_file *seq, void *v)
return 0;
}
-static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
- struct cgroup *cgrp, int ssid)
+#ifdef CONFIG_CGROUP_SCHED
+/**
+ * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest
+ *
+ * Find and get @cgrp's css associated with @ss. If the css doesn't exist
+ * or is offline, %NULL is returned.
+ */
+static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
+{
+ struct cgroup_subsys_state *css;
+
+ rcu_read_lock();
+ css = cgroup_css(cgrp, ss);
+ if (css && !css_tryget_online(css))
+ css = NULL;
+ rcu_read_unlock();
+
+ return css;
+}
+
+static int cgroup_extra_stat_show(struct seq_file *seq, int ssid)
{
+ struct cgroup *cgrp = seq_css(seq)->cgroup;
struct cgroup_subsys *ss = cgroup_subsys[ssid];
struct cgroup_subsys_state *css;
int ret;
@@ -3728,14 +3674,44 @@ static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
return ret;
}
+static int cgroup_local_stat_show(struct seq_file *seq,
+ struct cgroup *cgrp, int ssid)
+{
+ struct cgroup_subsys *ss = cgroup_subsys[ssid];
+ struct cgroup_subsys_state *css;
+ int ret;
+
+ if (!ss->css_local_stat_show)
+ return 0;
+
+ css = cgroup_tryget_css(cgrp, ss);
+ if (!css)
+ return 0;
+
+ ret = ss->css_local_stat_show(seq, css);
+ css_put(css);
+ return ret;
+}
+#endif
+
static int cpu_stat_show(struct seq_file *seq, void *v)
{
- struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
int ret = 0;
cgroup_base_stat_cputime_show(seq);
#ifdef CONFIG_CGROUP_SCHED
- ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
+ ret = cgroup_extra_stat_show(seq, cpu_cgrp_id);
+#endif
+ return ret;
+}
+
+static int cpu_local_stat_show(struct seq_file *seq, void *v)
+{
+ struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
+ int ret = 0;
+
+#ifdef CONFIG_CGROUP_SCHED
+ ret = cgroup_local_stat_show(seq, cgrp, cpu_cgrp_id);
#endif
return ret;
}
@@ -3785,7 +3761,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
}
psi = cgroup_psi(cgrp);
- new = psi_trigger_create(psi, buf, res, of->file);
+ new = psi_trigger_create(psi, buf, res, of->file, of);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
@@ -3891,6 +3867,14 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
}
+static int cgroup_pressure_open(struct kernfs_open_file *of)
+{
+ if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ return 0;
+}
+
static void cgroup_pressure_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;
@@ -4367,14 +4351,13 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
return ret;
}
-static int cgroup_rm_cftypes_locked(struct cftype *cfts)
+static void cgroup_rm_cftypes_locked(struct cftype *cfts)
{
lockdep_assert_held(&cgroup_mutex);
list_del(&cfts->node);
cgroup_apply_cftypes(cfts, false);
cgroup_exit_cftypes(cfts);
- return 0;
}
/**
@@ -4390,8 +4373,6 @@ static int cgroup_rm_cftypes_locked(struct cftype *cfts)
*/
int cgroup_rm_cftypes(struct cftype *cfts)
{
- int ret;
-
if (!cfts || cfts[0].name[0] == '\0')
return 0;
@@ -4399,9 +4380,9 @@ int cgroup_rm_cftypes(struct cftype *cfts)
return -ENOENT;
cgroup_lock();
- ret = cgroup_rm_cftypes_locked(cfts);
+ cgroup_rm_cftypes_locked(cfts);
cgroup_unlock();
- return ret;
+ return 0;
}
/**
@@ -5282,6 +5263,10 @@ static struct cftype cgroup_base_files[] = {
.name = "cpu.stat",
.seq_show = cpu_stat_show,
},
+ {
+ .name = "cpu.stat.local",
+ .seq_show = cpu_local_stat_show,
+ },
{ } /* terminate */
};
@@ -5290,6 +5275,7 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "io.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_IO]),
+ .open = cgroup_pressure_open,
.seq_show = cgroup_io_pressure_show,
.write = cgroup_io_pressure_write,
.poll = cgroup_pressure_poll,
@@ -5298,6 +5284,7 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "memory.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]),
+ .open = cgroup_pressure_open,
.seq_show = cgroup_memory_pressure_show,
.write = cgroup_memory_pressure_write,
.poll = cgroup_pressure_poll,
@@ -5306,6 +5293,7 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "cpu.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]),
+ .open = cgroup_pressure_open,
.seq_show = cgroup_cpu_pressure_show,
.write = cgroup_cpu_pressure_write,
.poll = cgroup_pressure_poll,
@@ -5315,6 +5303,7 @@ static struct cftype cgroup_psi_files[] = {
{
.name = "irq.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]),
+ .open = cgroup_pressure_open,
.seq_show = cgroup_irq_pressure_show,
.write = cgroup_irq_pressure_write,
.poll = cgroup_pressure_poll,
@@ -5346,7 +5335,7 @@ static struct cftype cgroup_psi_files[] = {
* RCU callback.
*
* 4. After the grace period, the css can be freed. Implemented in
- * css_free_work_fn().
+ * css_free_rwork_fn().
*
* It is actually hairier because both step 2 and 4 require process context
* and thus involve punting to css->destroy_work adding two additional
@@ -5590,8 +5579,7 @@ err_free_css:
/*
* The returned cgroup is fully initialized including its control mask, but
- * it isn't associated with its kernfs_node and doesn't have the control
- * mask applied.
+ * it doesn't have the control mask applied.
*/
static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
umode_t mode)
@@ -5917,7 +5905,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
/*
* Mark @cgrp and the associated csets dead. The former prevents
* further task migration and child creation by disabling
- * cgroup_lock_live_group(). The latter makes the csets ignored by
+ * cgroup_kn_lock_live(). The latter makes the csets ignored by
* the migration path.
*/
cgrp->self.flags &= ~CSS_ONLINE;
@@ -5939,7 +5927,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
parent->nr_threaded_children--;
spin_lock_irq(&css_set_lock);
- for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
+ for (tcgrp = parent; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
tcgrp->nr_descendants--;
tcgrp->nr_dying_descendants++;
/*
@@ -6132,8 +6120,8 @@ int __init cgroup_init(void)
continue;
if (cgroup1_ssid_disabled(ssid))
- printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
- ss->name);
+ pr_info("Disabling %s control group subsystem in v1 mounts\n",
+ ss->name);
cgrp_dfl_root.subsys_mask |= 1 << ss->id;
@@ -6696,6 +6684,9 @@ void cgroup_exit(struct task_struct *tsk)
list_add_tail(&tsk->cg_list, &cset->dying_tasks);
cset->nr_tasks--;
+ if (dl_task(tsk))
+ dec_dl_tasks_cs(tsk);
+
WARN_ON_ONCE(cgroup_task_frozen(tsk));
if (unlikely(!(tsk->flags & PF_KTHREAD) &&
test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))