aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/cgroup.c13
-rw-r--r--kernel/bpf/memalloc.c88
-rw-r--r--kernel/bpf/mprog.c13
-rw-r--r--kernel/bpf/offload.c12
-rw-r--r--kernel/bpf/queue_stack_maps.c21
-rw-r--r--kernel/bpf/syscall.c21
-rw-r--r--kernel/bpf/tcx.c8
-rw-r--r--kernel/bpf/verifier.c14
9 files changed, 148 insertions, 44 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 1095bbe29859..8090d7fb11ef 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -8501,7 +8501,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
tname = btf_name_by_offset(btf, walk_type->name_off);
ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
- if (ret < 0)
+ if (ret >= sizeof(safe_tname))
return false;
safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info));
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 5b2741aa0d9b..03b3d4492980 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -785,7 +785,8 @@ found:
* to descendants
* @cgrp: The cgroup which descendants to traverse
* @link: A link for which to replace BPF program
- * @type: Type of attach operation
+ * @new_prog: &struct bpf_prog for the target BPF program with its refcnt
+ * incremented
*
* Must be called with cgroup_mutex held.
*/
@@ -1334,7 +1335,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
* @sk: The socket sending or receiving traffic
* @skb: The skb that is being sent or received
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
*
* If no socket is passed, or the socket is not of type INET or INET6,
* this function does nothing and returns 0.
@@ -1424,7 +1425,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
/**
* __cgroup_bpf_run_filter_sk() - Run a program on a sock
* @sk: sock structure to manipulate
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
*
* socket is passed is expected to be of type INET or INET6.
*
@@ -1449,7 +1450,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
* provided by user sockaddr
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
* @t_ctx: Pointer to attach type specific context
* @flags: Pointer to u32 which contains higher bits of BPF program
* return value (OR'ed together).
@@ -1496,7 +1497,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
* sk with connection information (IP addresses, etc.) May not contain
* cgroup info if it is a req sock.
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
*
* socket passed is expected to be of type INET or INET6.
*
@@ -1670,7 +1671,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
* @ppos: value-result argument: value is position at which read from or write
* to sysctl is happening, result is new position if program overrode it,
* initial value otherwise
- * @type: type of program to be executed
+ * @atype: type of program to be executed
*
* Program is run when sysctl is being accessed, either read or written, and
* can allow or deny such access.
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 9c49ae53deaf..d93ddac283d4 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
* Typical case will be between 11K and 116K closer to 11K.
* bpf progs can and should share bpf_mem_cache when possible.
*/
-
-static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
+static void init_refill_work(struct bpf_mem_cache *c)
{
init_irq_work(&c->refill_work, bpf_mem_refill);
if (c->unit_size <= 256) {
@@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
c->high_watermark = max(96 * 256 / c->unit_size, 3);
}
c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1);
+}
+static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
+{
/* To avoid consuming memory assume that 1st run of bpf
* prog won't be doing more than 4 map_update_elem from
* irq disabled region
@@ -484,6 +486,31 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
}
+static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
+{
+ struct llist_node *first;
+ unsigned int obj_size;
+
+ /* For per-cpu allocator, the size of free objects in free list doesn't
+ * match with unit_size and now there is no way to get the size of
+ * per-cpu pointer saved in free object, so just skip the checking.
+ */
+ if (c->percpu_size)
+ return 0;
+
+ first = c->free_llist.first;
+ if (!first)
+ return 0;
+
+ obj_size = ksize(first);
+ if (obj_size != c->unit_size) {
+ WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
+ idx, obj_size, c->unit_size);
+ return -EINVAL;
+ }
+ return 0;
+}
+
/* When size != 0 bpf_mem_cache for each cpu.
* This is typical bpf hash map use case when all elements have equal size.
*
@@ -494,10 +521,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
{
static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
+ int cpu, i, err, unit_size, percpu_size = 0;
struct bpf_mem_caches *cc, __percpu *pcc;
struct bpf_mem_cache *c, __percpu *pc;
struct obj_cgroup *objcg = NULL;
- int cpu, i, unit_size, percpu_size = 0;
if (size) {
pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
@@ -521,6 +548,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c->objcg = objcg;
c->percpu_size = percpu_size;
c->tgt = c;
+ init_refill_work(c);
prefill_mem_cache(c, cpu);
}
ma->cache = pc;
@@ -534,6 +562,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
if (!pcc)
return -ENOMEM;
+ err = 0;
#ifdef CONFIG_MEMCG_KMEM
objcg = get_obj_cgroup_from_current();
#endif
@@ -544,11 +573,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c->unit_size = sizes[i];
c->objcg = objcg;
c->tgt = c;
+
+ init_refill_work(c);
+ /* Another bpf_mem_cache will be used when allocating
+ * c->unit_size in bpf_mem_alloc(), so doesn't prefill
+ * for the bpf_mem_cache because these free objects will
+ * never be used.
+ */
+ if (i != bpf_mem_cache_idx(c->unit_size))
+ continue;
prefill_mem_cache(c, cpu);
+ err = check_obj_size(c, i);
+ if (err)
+ goto out;
}
}
+
+out:
ma->caches = pcc;
- return 0;
+ /* refill_work is either zeroed or initialized, so it is safe to
+ * call irq_work_sync().
+ */
+ if (err)
+ bpf_mem_alloc_destroy(ma);
+ return err;
}
static void drain_mem_cache(struct bpf_mem_cache *c)
@@ -916,3 +964,35 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
return !ret ? NULL : ret + LLIST_NODE_SZ;
}
+
+static __init int bpf_mem_cache_adjust_size(void)
+{
+ unsigned int size;
+
+ /* Adjusting the indexes in size_index() according to the object_size
+ * of underlying slab cache, so bpf_mem_alloc() will select a
+ * bpf_mem_cache with unit_size equal to the object_size of
+ * the underlying slab cache.
+ *
+ * The maximal value of KMALLOC_MIN_SIZE and __kmalloc_minalign() is
+ * 256-bytes, so only do adjustment for [8-bytes, 192-bytes].
+ */
+ for (size = 192; size >= 8; size -= 8) {
+ unsigned int kmalloc_size, index;
+
+ kmalloc_size = kmalloc_size_roundup(size);
+ if (kmalloc_size == size)
+ continue;
+
+ if (kmalloc_size <= 192)
+ index = size_index[(kmalloc_size - 1) / 8];
+ else
+ index = fls(kmalloc_size - 1) - 1;
+ /* Only overwrite if necessary */
+ if (size_index[(size - 1) / 8] != index)
+ size_index[(size - 1) / 8] = index;
+ }
+
+ return 0;
+}
+subsys_initcall(bpf_mem_cache_adjust_size);
diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c
index 32d2c4829eb8..1394168062e8 100644
--- a/kernel/bpf/mprog.c
+++ b/kernel/bpf/mprog.c
@@ -253,6 +253,9 @@ int bpf_mprog_attach(struct bpf_mprog_entry *entry,
goto out;
}
idx = tidx;
+ } else if (bpf_mprog_total(entry) == bpf_mprog_max()) {
+ ret = -ERANGE;
+ goto out;
}
if (flags & BPF_F_BEFORE) {
tidx = bpf_mprog_pos_before(entry, &rtuple);
@@ -398,14 +401,16 @@ int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
struct bpf_mprog_cp *cp;
struct bpf_prog *prog;
const u32 flags = 0;
+ u32 id, count = 0;
+ u64 revision = 1;
int i, ret = 0;
- u32 id, count;
- u64 revision;
if (attr->query.query_flags || attr->query.attach_flags)
return -EINVAL;
- revision = bpf_mprog_revision(entry);
- count = bpf_mprog_total(entry);
+ if (entry) {
+ revision = bpf_mprog_revision(entry);
+ count = bpf_mprog_total(entry);
+ }
if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
return -EFAULT;
if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision)))
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 3e4f2ec1af06..87d6693d8233 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -199,12 +199,14 @@ static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *n
offload->netdev = netdev;
ondev = bpf_offload_find_netdev(offload->netdev);
+ /* When program is offloaded require presence of "true"
+ * bpf_offload_netdev, avoid the one created for !ondev case below.
+ */
+ if (bpf_prog_is_offloaded(prog->aux) && (!ondev || !ondev->offdev)) {
+ err = -EINVAL;
+ goto err_free;
+ }
if (!ondev) {
- if (bpf_prog_is_offloaded(prog->aux)) {
- err = -EINVAL;
- goto err_free;
- }
-
/* When only binding to the device, explicitly
* create an entry in the hashtable.
*/
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 8d2ddcb7566b..d869f51ea93a 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -98,7 +98,12 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
int err = 0;
void *ptr;
- raw_spin_lock_irqsave(&qs->lock, flags);
+ if (in_nmi()) {
+ if (!raw_spin_trylock_irqsave(&qs->lock, flags))
+ return -EBUSY;
+ } else {
+ raw_spin_lock_irqsave(&qs->lock, flags);
+ }
if (queue_stack_map_is_empty(qs)) {
memset(value, 0, qs->map.value_size);
@@ -128,7 +133,12 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
void *ptr;
u32 index;
- raw_spin_lock_irqsave(&qs->lock, flags);
+ if (in_nmi()) {
+ if (!raw_spin_trylock_irqsave(&qs->lock, flags))
+ return -EBUSY;
+ } else {
+ raw_spin_lock_irqsave(&qs->lock, flags);
+ }
if (queue_stack_map_is_empty(qs)) {
memset(value, 0, qs->map.value_size);
@@ -193,7 +203,12 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
if (flags & BPF_NOEXIST || flags > BPF_EXIST)
return -EINVAL;
- raw_spin_lock_irqsave(&qs->lock, irq_flags);
+ if (in_nmi()) {
+ if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
+ return -EBUSY;
+ } else {
+ raw_spin_lock_irqsave(&qs->lock, irq_flags);
+ }
if (queue_stack_map_is_full(qs)) {
if (!replace) {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index eb01c31ed591..d77b2f8b9364 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3796,7 +3796,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
{
enum bpf_prog_type ptype;
struct bpf_prog *prog;
- u32 mask;
int ret;
if (CHECK_ATTR(BPF_PROG_ATTACH))
@@ -3805,10 +3804,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ptype = attach_type_to_prog_type(attr->attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC)
return -EINVAL;
- mask = bpf_mprog_supported(ptype) ?
- BPF_F_ATTACH_MASK_MPROG : BPF_F_ATTACH_MASK_BASE;
- if (attr->attach_flags & ~mask)
- return -EINVAL;
+ if (bpf_mprog_supported(ptype)) {
+ if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
+ return -EINVAL;
+ } else {
+ if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE)
+ return -EINVAL;
+ if (attr->relative_fd ||
+ attr->expected_revision)
+ return -EINVAL;
+ }
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
@@ -3878,6 +3883,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)
if (IS_ERR(prog))
return PTR_ERR(prog);
}
+ } else if (attr->attach_flags ||
+ attr->relative_fd ||
+ attr->expected_revision) {
+ return -EINVAL;
}
switch (ptype) {
@@ -3913,7 +3922,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return ret;
}
-#define BPF_PROG_QUERY_LAST_FIELD query.link_attach_flags
+#define BPF_PROG_QUERY_LAST_FIELD query.revision
static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
diff --git a/kernel/bpf/tcx.c b/kernel/bpf/tcx.c
index 13f0b5dc8262..1338a13a8b64 100644
--- a/kernel/bpf/tcx.c
+++ b/kernel/bpf/tcx.c
@@ -123,7 +123,6 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
{
bool ingress = attr->query.attach_type == BPF_TCX_INGRESS;
struct net *net = current->nsproxy->net_ns;
- struct bpf_mprog_entry *entry;
struct net_device *dev;
int ret;
@@ -133,12 +132,7 @@ int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
ret = -ENODEV;
goto out;
}
- entry = tcx_entry_fetch(dev, ingress);
- if (!entry) {
- ret = -ENOENT;
- goto out;
- }
- ret = bpf_mprog_query(attr, uattr, entry);
+ ret = bpf_mprog_query(attr, uattr, tcx_entry_fetch(dev, ingress));
out:
rtnl_unlock();
return ret;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bb78212fa5b2..873ade146f3d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4047,11 +4047,9 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bitmap_from_u64(mask, bt_reg_mask(bt));
for_each_set_bit(i, mask, 32) {
reg = &st->frame[0]->regs[i];
- if (reg->type != SCALAR_VALUE) {
- bt_clear_reg(bt, i);
- continue;
- }
- reg->precise = true;
+ bt_clear_reg(bt, i);
+ if (reg->type == SCALAR_VALUE)
+ reg->precise = true;
}
return 0;
}
@@ -14481,7 +14479,7 @@ static int check_return_code(struct bpf_verifier_env *env)
struct tnum enforce_attach_type_range = tnum_unknown;
const struct bpf_prog *prog = env->prog;
struct bpf_reg_state *reg;
- struct tnum range = tnum_range(0, 1);
+ struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
struct bpf_func_state *frame = env->cur_state->frame[0];
@@ -14529,8 +14527,8 @@ static int check_return_code(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (!tnum_in(tnum_const(0), reg->var_off)) {
- verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
+ if (!tnum_in(const_0, reg->var_off)) {
+ verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0");
return -EINVAL;
}
return 0;