9 files changed, 263 insertions, 2 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 984f73b719a9..a4414a11eea7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
 	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+					       struct cgroup *ancestor)
+{
+	struct css_set *cset = task_css_set(task);
+
+	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
+}
+
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_is_populated(struct cgroup *cgrp)
 {
@@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
+struct cgroup;
 
 static inline void css_put(struct cgroup_subsys_state *css) {}
 static inline int cgroup_attach_task_all(struct task_struct *from,
@@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {}
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+					       struct cgroup *ancestor)
+{
+	return true;
+}
 #endif /* !CONFIG_CGROUPS */
 
 /*
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fec6056..bea0c4e2830a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -375,6 +375,17 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_under_cgroup,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650d7aeb..a2ac051c342f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
 				     struct file *map_file /* not used */,
 				     int fd)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7094c69ac199..d504722ebfa4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_in_cgroup)
+		if (func_id != BPF_FUNC_skb_in_cgroup &&
+		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
 	default:
@@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_current_task_under_cgroup:
 	case BPF_FUNC_skb_in_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438fdb029..6b794d6669a7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *)(long)r1;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct cgroup *cgrp;
+	u32 idx = (u32)r2;
+
+	if (unlikely(in_interrupt()))
+		return -EINVAL;
+
+	if (unlikely(idx >= array->map.max_entries))
+		return -E2BIG;
+
+	cgrp = READ_ONCE(array->ptrs[idx]);
+	if (unlikely(!cgrp))
+		return -EAGAIN;
+
+	return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+	.func           = bpf_current_task_under_cgroup,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -407,6 +435,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
 		return bpf_get_probe_write_proto();
+	case BPF_FUNC_current_task_under_cgroup:
+		return &bpf_current_task_under_cgroup_proto;
 	default:
 		return NULL;
 	}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 90ebf7d35c07..eb582c6264c3 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -24,6 +24,7 @@ hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += test_current_task_under_cgroup
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -49,6 +50,8 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
+test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
+				       test_current_task_under_cgroup_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -74,6 +77,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += test_current_task_under_cgroup_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -97,6 +101,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index cbc52df165b4..5e4c41e256b8 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -45,6 +45,8 @@ static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
 	(void *) BPF_FUNC_get_stackid;
 static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
 	(void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+	(void *) BPF_FUNC_current_task_under_cgroup;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
new file mode 100644
index 000000000000..86b28d7d6c99
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -0,0 +1,43 @@
+/* Copyright (c) 2016 Sargun Dhillon <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/utsname.h>
+
+struct bpf_map_def SEC("maps") cgroup_map = {
+	.type			= BPF_MAP_TYPE_CGROUP_ARRAY,
+	.key_size		= sizeof(u32),
+	.value_size		= sizeof(u32),
+	.max_entries	= 1,
+};
+
+struct bpf_map_def SEC("maps") perf_map = {
+	.type			= BPF_MAP_TYPE_ARRAY,
+	.key_size		= sizeof(u32),
+	.value_size		= sizeof(u64),
+	.max_entries	= 1,
+};
+
+/* Writes the last PID that called sync to a map at index 0 */
+SEC("kprobe/sys_sync")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	u64 pid = bpf_get_current_pid_tgid();
+	int idx = 0;
+
+	if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
+		return 0;
+
+	bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
new file mode 100644
index 000000000000..30b0bce884f9
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -0,0 +1,145 @@
+/* Copyright (c) 2016 Sargun Dhillon <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+
+#define CGROUP_MOUNT_PATH	"/mnt"
+#define CGROUP_PATH		"/mnt/my-cgroup"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+static int join_cgroup(char *path)
+{
+	int fd, rc = 0;
+	pid_t pid = getpid();
+	char cgroup_path[PATH_MAX + 1];
+
+	snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path);
+
+	fd = open(cgroup_path, O_WRONLY);
+	if (fd < 0) {
+		log_err("Opening Cgroup");
+		return 1;
+	}
+
+	if (dprintf(fd, "%d\n", pid) < 0) {
+		log_err("Joining Cgroup");
+		rc = 1;
+	}
+	close(fd);
+	return rc;
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int cg2, idx = 0;
+	pid_t remote_pid, local_pid = getpid();
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	/*
+	 * This is to avoid interfering with existing cgroups. Unfortunately,
+	 * most people don't have cgroupv2 enabled at this point in time.
+	 * It's easier to create our own mount namespace and manage it
+	 * ourselves.
+	 */
+	if (unshare(CLONE_NEWNS)) {
+		log_err("unshare");
+		return 1;
+	}
+
+	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+		log_err("mount fakeroot");
+		return 1;
+	}
+
+	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
+		log_err("mount cgroup2");
+		return 1;
+	}
+
+	if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) {
+		log_err("mkdir cgroup");
+		return 1;
+	}
+
+	cg2 = open(CGROUP_PATH, O_RDONLY);
+	if (cg2 < 0) {
+		log_err("opening target cgroup");
+		goto cleanup_cgroup_err;
+	}
+
+	if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
+		log_err("Adding target cgroup to map");
+		goto cleanup_cgroup_err;
+	}
+	if (join_cgroup("/mnt/my-cgroup")) {
+		log_err("Leaving target cgroup");
+		goto cleanup_cgroup_err;
+	}
+
+	/*
+	 * The installed helper program catched the sync call, and should
+	 * write it to the map.
+	 */
+
+	sync();
+	bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+	if (local_pid != remote_pid) {
+		fprintf(stderr,
+			"BPF Helper didn't write correct PID to map, but: %d\n",
+			remote_pid);
+		goto leave_cgroup_err;
+	}
+
+	/* Verify the negative scenario; leave the cgroup */
+	if (join_cgroup(CGROUP_MOUNT_PATH))
+		goto leave_cgroup_err;
+
+	remote_pid = 0;
+	bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
+
+	sync();
+	bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+	if (local_pid == remote_pid) {
+		fprintf(stderr, "BPF cgroup negative test did not work\n");
+		goto cleanup_cgroup_err;
+	}
+
+	rmdir(CGROUP_PATH);
+	return 0;
+
+	/* Error condition, cleanup */
+leave_cgroup_err:
+	join_cgroup(CGROUP_MOUNT_PATH);
+cleanup_cgroup_err:
+	rmdir(CGROUP_PATH);
+	return 1;
+}