diff options
Diffstat (limited to 'kernel/bpf/inode.c')
| -rw-r--r-- | kernel/bpf/inode.c | 387 | 
1 files changed, 387 insertions, 0 deletions
| diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c new file mode 100644 index 000000000000..be6d726e31c9 --- /dev/null +++ b/kernel/bpf/inode.c @@ -0,0 +1,387 @@ +/* + * Minimal file system backend for holding eBPF maps and programs, + * used by bpf(2) object pinning. + * + * Authors: + * + *	Daniel Borkmann <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/magic.h> +#include <linux/major.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/fs.h> +#include <linux/kdev_t.h> +#include <linux/filter.h> +#include <linux/bpf.h> + +enum bpf_type { +	BPF_TYPE_UNSPEC	= 0, +	BPF_TYPE_PROG, +	BPF_TYPE_MAP, +}; + +static void *bpf_any_get(void *raw, enum bpf_type type) +{ +	switch (type) { +	case BPF_TYPE_PROG: +		atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); +		break; +	case BPF_TYPE_MAP: +		atomic_inc(&((struct bpf_map *)raw)->refcnt); +		break; +	default: +		WARN_ON_ONCE(1); +		break; +	} + +	return raw; +} + +static void bpf_any_put(void *raw, enum bpf_type type) +{ +	switch (type) { +	case BPF_TYPE_PROG: +		bpf_prog_put(raw); +		break; +	case BPF_TYPE_MAP: +		bpf_map_put(raw); +		break; +	default: +		WARN_ON_ONCE(1); +		break; +	} +} + +static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) +{ +	void *raw; + +	*type = BPF_TYPE_MAP; +	raw = bpf_map_get(ufd); +	if (IS_ERR(raw)) { +		*type = BPF_TYPE_PROG; +		raw = bpf_prog_get(ufd); +	} + +	return raw; +} + +static const struct inode_operations bpf_dir_iops; + +static const struct inode_operations bpf_prog_iops = { }; +static const struct inode_operations bpf_map_iops  = { }; + +static struct inode *bpf_get_inode(struct super_block *sb, +				   const struct inode *dir, +				   umode_t mode) +{ +	struct inode *inode; + +	switch (mode & S_IFMT) { +	case S_IFDIR: +	case S_IFREG: +		break; +	default: +		return ERR_PTR(-EINVAL); +	} + +	inode = new_inode(sb); +	if (!inode) +		return ERR_PTR(-ENOSPC); + +	inode->i_ino = get_next_ino(); +	inode->i_atime = CURRENT_TIME; +	inode->i_mtime = inode->i_atime; +	inode->i_ctime = inode->i_atime; + +	inode_init_owner(inode, dir, mode); + +	return inode; +} + +static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) +{ +	*type = BPF_TYPE_UNSPEC; +	if (inode->i_op == &bpf_prog_iops) +		*type = BPF_TYPE_PROG; +	else if (inode->i_op == &bpf_map_iops) +		*type = BPF_TYPE_MAP; +	else +		return -EACCES; + +	return 0; +} + +static bool bpf_dname_reserved(const struct dentry *dentry) +{ +	return strchr(dentry->d_name.name, '.'); +} + +static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ +	struct inode *inode; + +	if (bpf_dname_reserved(dentry)) +		return -EPERM; + +	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); +	if (IS_ERR(inode)) +		return PTR_ERR(inode); + +	inode->i_op = &bpf_dir_iops; +	inode->i_fop = &simple_dir_operations; + +	inc_nlink(inode); +	inc_nlink(dir); + +	d_instantiate(dentry, inode); +	dget(dentry); + +	return 0; +} + +static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, +			 umode_t mode, const struct inode_operations *iops) +{ +	struct inode *inode; + +	if (bpf_dname_reserved(dentry)) +		return -EPERM; + +	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); +	if (IS_ERR(inode)) +		return PTR_ERR(inode); + +	inode->i_op = iops; +	inode->i_private = dentry->d_fsdata; + +	d_instantiate(dentry, inode); +	dget(dentry); + +	return 0; +} + +static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, +		     dev_t devt) +{ +	enum bpf_type type = MINOR(devt); + +	if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || +	    dentry->d_fsdata == NULL) +		return -EPERM; + +	switch (type) { +	case BPF_TYPE_PROG: +		return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); +	case BPF_TYPE_MAP: +		return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); +	default: +		return -EPERM; +	} +} + +static const struct inode_operations bpf_dir_iops = { +	.lookup		= simple_lookup, +	.mknod		= bpf_mkobj, +	.mkdir		= bpf_mkdir, +	.rmdir		= simple_rmdir, +	.unlink		= simple_unlink, +}; + +static int bpf_obj_do_pin(const struct filename *pathname, void *raw, +			  enum bpf_type type) +{ +	struct dentry *dentry; +	struct inode *dir; +	struct path path; +	umode_t mode; +	dev_t devt; +	int ret; + +	dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); +	if (IS_ERR(dentry)) +		return PTR_ERR(dentry); + +	mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); +	devt = MKDEV(UNNAMED_MAJOR, type); + +	ret = security_path_mknod(&path, dentry, mode, devt); +	if (ret) +		goto out; + +	dir = d_inode(path.dentry); +	if (dir->i_op != &bpf_dir_iops) { +		ret = -EPERM; +		goto out; +	} + +	dentry->d_fsdata = raw; +	ret = vfs_mknod(dir, dentry, mode, devt); +	dentry->d_fsdata = NULL; +out: +	done_path_create(&path, dentry); +	return ret; +} + +int bpf_obj_pin_user(u32 ufd, const char __user *pathname) +{ +	struct filename *pname; +	enum bpf_type type; +	void *raw; +	int ret; + +	pname = getname(pathname); +	if (IS_ERR(pname)) +		return PTR_ERR(pname); + +	raw = bpf_fd_probe_obj(ufd, &type); +	if (IS_ERR(raw)) { +		ret = PTR_ERR(raw); +		goto out; +	} + +	ret = bpf_obj_do_pin(pname, raw, type); +	if (ret != 0) +		bpf_any_put(raw, type); +out: +	putname(pname); +	return ret; +} + +static void *bpf_obj_do_get(const struct filename *pathname, +			    enum bpf_type *type) +{ +	struct inode *inode; +	struct path path; +	void *raw; +	int ret; + +	ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); +	if (ret) +		return ERR_PTR(ret); + +	inode = d_backing_inode(path.dentry); +	ret = inode_permission(inode, MAY_WRITE); +	if (ret) +		goto out; + +	ret = bpf_inode_type(inode, type); +	if (ret) +		goto out; + +	raw = bpf_any_get(inode->i_private, *type); +	touch_atime(&path); + +	path_put(&path); +	return raw; +out: +	path_put(&path); +	return ERR_PTR(ret); +} + +int bpf_obj_get_user(const char __user *pathname) +{ +	enum bpf_type type = BPF_TYPE_UNSPEC; +	struct filename *pname; +	int ret = -ENOENT; +	void *raw; + +	pname = getname(pathname); +	if (IS_ERR(pname)) +		return PTR_ERR(pname); + +	raw = bpf_obj_do_get(pname, &type); +	if (IS_ERR(raw)) { +		ret = PTR_ERR(raw); +		goto out; +	} + +	if (type == BPF_TYPE_PROG) +		ret = bpf_prog_new_fd(raw); +	else if (type == BPF_TYPE_MAP) +		ret = bpf_map_new_fd(raw); +	else +		goto out; + +	if (ret < 0) +		bpf_any_put(raw, type); +out: +	putname(pname); +	return ret; +} + +static void bpf_evict_inode(struct inode *inode) +{ +	enum bpf_type type; + +	truncate_inode_pages_final(&inode->i_data); +	clear_inode(inode); + +	if (!bpf_inode_type(inode, &type)) +		bpf_any_put(inode->i_private, type); +} + +static const struct super_operations bpf_super_ops = { +	.statfs		= simple_statfs, +	.drop_inode	= generic_delete_inode, +	.evict_inode	= bpf_evict_inode, +}; + +static int bpf_fill_super(struct super_block *sb, void *data, int silent) +{ +	static struct tree_descr bpf_rfiles[] = { { "" } }; +	struct inode *inode; +	int ret; + +	ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); +	if (ret) +		return ret; + +	sb->s_op = &bpf_super_ops; + +	inode = sb->s_root->d_inode; +	inode->i_op = &bpf_dir_iops; +	inode->i_mode &= ~S_IALLUGO; +	inode->i_mode |= S_ISVTX | S_IRWXUGO; + +	return 0; +} + +static struct dentry *bpf_mount(struct file_system_type *type, int flags, +				const char *dev_name, void *data) +{ +	return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); +} + +static struct file_system_type bpf_fs_type = { +	.owner		= THIS_MODULE, +	.name		= "bpf", +	.mount		= bpf_mount, +	.kill_sb	= kill_litter_super, +	.fs_flags	= FS_USERNS_MOUNT, +}; + +MODULE_ALIAS_FS("bpf"); + +static int __init bpf_init(void) +{ +	int ret; + +	ret = sysfs_create_mount_point(fs_kobj, "bpf"); +	if (ret) +		return ret; + +	ret = register_filesystem(&bpf_fs_type); +	if (ret) +		sysfs_remove_mount_point(fs_kobj, "bpf"); + +	return ret; +} +fs_initcall(bpf_init); |