aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h23
-rw-r--r--include/uapi/linux/bpf.h11
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/verifier.c4
-rw-r--r--kernel/trace/bpf_trace.c30
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--samples/bpf/bpf_helpers.h2
-rw-r--r--samples/bpf/test_current_task_under_cgroup_kern.c43
-rw-r--r--samples/bpf/test_current_task_under_cgroup_user.c145
9 files changed, 263 insertions, 2 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 984f73b719a9..a4414a11eea7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
}
+/**
+ * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+ struct cgroup *ancestor)
+{
+ struct css_set *cset = task_css_set(task);
+
+ return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
+}
+
/* no synchronization, the result can only be used as a hint */
static inline bool cgroup_is_populated(struct cgroup *cgrp)
{
@@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
#else /* !CONFIG_CGROUPS */
struct cgroup_subsys_state;
+struct cgroup;
static inline void css_put(struct cgroup_subsys_state *css) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
@@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {}
static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; }
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+ struct cgroup *ancestor)
+{
+ return true;
+}
#endif /* !CONFIG_CGROUPS */
/*
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fec6056..bea0c4e2830a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -375,6 +375,17 @@ enum bpf_func_id {
*/
BPF_FUNC_probe_write_user,
+ /**
+ * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+ * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ * @index: index of the cgroup in the bpf_map
+ * Return:
+ * == 0 current failed the cgroup2 descendant test
+ * == 1 current succeeded the cgroup2 descendant test
+ * < 0 error
+ */
+ BPF_FUNC_current_task_under_cgroup,
+
__BPF_FUNC_MAX_ID,
};
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650d7aeb..a2ac051c342f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
}
late_initcall(register_perf_event_array_map);
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int fd)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7094c69ac199..d504722ebfa4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
- if (func_id != BPF_FUNC_skb_in_cgroup)
+ if (func_id != BPF_FUNC_skb_in_cgroup &&
+ func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
default:
@@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
+ case BPF_FUNC_current_task_under_cgroup:
case BPF_FUNC_skb_in_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438fdb029..6b794d6669a7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
.ret_type = RET_INTEGER,
};
+static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct bpf_map *map = (struct bpf_map *)(long)r1;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct cgroup *cgrp;
+ u32 idx = (u32)r2;
+
+ if (unlikely(in_interrupt()))
+ return -EINVAL;
+
+ if (unlikely(idx >= array->map.max_entries))
+ return -E2BIG;
+
+ cgrp = READ_ONCE(array->ptrs[idx]);
+ if (unlikely(!cgrp))
+ return -EAGAIN;
+
+ return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+ .func = bpf_current_task_under_cgroup,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -407,6 +435,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_perf_event_read_proto;
case BPF_FUNC_probe_write_user:
return bpf_get_probe_write_proto();
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
default:
return NULL;
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 90ebf7d35c07..eb582c6264c3 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -24,6 +24,7 @@ hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
hostprogs-y += xdp1
hostprogs-y += xdp2
+hostprogs-y += test_current_task_under_cgroup
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -49,6 +50,8 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
+test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
+ test_current_task_under_cgroup_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -74,6 +77,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o
always += xdp1_kern.o
always += xdp2_kern.o
+always += test_current_task_under_cgroup_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
@@ -97,6 +101,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
HOSTLOADLIBES_test_overhead += -lelf -lrt
HOSTLOADLIBES_xdp1 += -lelf
HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index cbc52df165b4..5e4c41e256b8 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -45,6 +45,8 @@ static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
(void *) BPF_FUNC_get_stackid;
static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
(void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+ (void *) BPF_FUNC_current_task_under_cgroup;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
new file mode 100644
index 000000000000..86b28d7d6c99
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -0,0 +1,43 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/utsname.h>
+
+struct bpf_map_def SEC("maps") cgroup_map = {
+ .type = BPF_MAP_TYPE_CGROUP_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") perf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = 1,
+};
+
+/* Writes the last PID that called sync to a map at index 0 */
+SEC("kprobe/sys_sync")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ u64 pid = bpf_get_current_pid_tgid();
+ int idx = 0;
+
+ if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
+ return 0;
+
+ bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
new file mode 100644
index 000000000000..30b0bce884f9
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -0,0 +1,145 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+
+#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_PATH "/mnt/my-cgroup"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+static int join_cgroup(char *path)
+{
+ int fd, rc = 0;
+ pid_t pid = getpid();
+ char cgroup_path[PATH_MAX + 1];
+
+ snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path);
+
+ fd = open(cgroup_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening Cgroup");
+ return 1;
+ }
+
+ if (dprintf(fd, "%d\n", pid) < 0) {
+ log_err("Joining Cgroup");
+ rc = 1;
+ }
+ close(fd);
+ return rc;
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ int cg2, idx = 0;
+ pid_t remote_pid, local_pid = getpid();
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ /*
+ * This is to avoid interfering with existing cgroups. Unfortunately,
+ * most people don't have cgroupv2 enabled at this point in time.
+ * It's easier to create our own mount namespace and manage it
+ * ourselves.
+ */
+ if (unshare(CLONE_NEWNS)) {
+ log_err("unshare");
+ return 1;
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+ log_err("mount fakeroot");
+ return 1;
+ }
+
+ if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
+ log_err("mount cgroup2");
+ return 1;
+ }
+
+ if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup");
+ return 1;
+ }
+
+ cg2 = open(CGROUP_PATH, O_RDONLY);
+ if (cg2 < 0) {
+ log_err("opening target cgroup");
+ goto cleanup_cgroup_err;
+ }
+
+ if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
+ log_err("Adding target cgroup to map");
+ goto cleanup_cgroup_err;
+ }
+ if (join_cgroup("/mnt/my-cgroup")) {
+ log_err("Leaving target cgroup");
+ goto cleanup_cgroup_err;
+ }
+
+ /*
+ * The installed helper program catched the sync call, and should
+ * write it to the map.
+ */
+
+ sync();
+ bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+ if (local_pid != remote_pid) {
+ fprintf(stderr,
+ "BPF Helper didn't write correct PID to map, but: %d\n",
+ remote_pid);
+ goto leave_cgroup_err;
+ }
+
+ /* Verify the negative scenario; leave the cgroup */
+ if (join_cgroup(CGROUP_MOUNT_PATH))
+ goto leave_cgroup_err;
+
+ remote_pid = 0;
+ bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
+
+ sync();
+ bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+ if (local_pid == remote_pid) {
+ fprintf(stderr, "BPF cgroup negative test did not work\n");
+ goto cleanup_cgroup_err;
+ }
+
+ rmdir(CGROUP_PATH);
+ return 0;
+
+ /* Error condition, cleanup */
+leave_cgroup_err:
+ join_cgroup(CGROUP_MOUNT_PATH);
+cleanup_cgroup_err:
+ rmdir(CGROUP_PATH);
+ return 1;
+}