aboutsummaryrefslogtreecommitdiff
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c1211
1 files changed, 952 insertions, 259 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a39eedecc93a..aedac2ac02b9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -187,6 +187,9 @@ struct bpf_verifier_stack_elem {
POISON_POINTER_DELTA))
#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
+static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
+static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
+
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
@@ -245,6 +248,7 @@ struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
bool pkt_access;
+ u8 release_regno;
int regno;
int access_size;
int mem_size;
@@ -257,6 +261,8 @@ struct bpf_call_arg_meta {
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
+ struct bpf_map_value_off_desc *kptr_off_desc;
+ u8 uninit_dynptr_regno;
};
struct btf *btf_vmlinux;
@@ -452,7 +458,8 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
return base_type(type) == PTR_TO_SOCKET ||
base_type(type) == PTR_TO_TCP_SOCK ||
- base_type(type) == PTR_TO_MEM;
+ base_type(type) == PTR_TO_MEM ||
+ base_type(type) == PTR_TO_BTF_ID;
}
static bool type_is_rdonly_mem(u32 type)
@@ -470,17 +477,6 @@ static bool type_may_be_null(u32 type)
return type & PTR_MAYBE_NULL;
}
-/* Determine whether the function releases some resources allocated by another
- * function call. The first reference type argument will be assumed to be
- * released by release_reference().
- */
-static bool is_release_function(enum bpf_func_id func_id)
-{
- return func_id == BPF_FUNC_sk_release ||
- func_id == BPF_FUNC_ringbuf_submit ||
- func_id == BPF_FUNC_ringbuf_discard;
-}
-
static bool may_be_acquire_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_lookup_tcp ||
@@ -498,7 +494,8 @@ static bool is_acquire_function(enum bpf_func_id func_id,
if (func_id == BPF_FUNC_sk_lookup_tcp ||
func_id == BPF_FUNC_sk_lookup_udp ||
func_id == BPF_FUNC_skc_lookup_tcp ||
- func_id == BPF_FUNC_ringbuf_reserve)
+ func_id == BPF_FUNC_ringbuf_reserve ||
+ func_id == BPF_FUNC_kptr_xchg)
return true;
if (func_id == BPF_FUNC_map_lookup_elem &&
@@ -516,6 +513,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_skc_to_tcp_sock ||
func_id == BPF_FUNC_skc_to_tcp6_sock ||
func_id == BPF_FUNC_skc_to_udp6_sock ||
+ func_id == BPF_FUNC_skc_to_mptcp_sock ||
func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
@@ -535,10 +533,10 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
static const char *reg_type_str(struct bpf_verifier_env *env,
enum bpf_reg_type type)
{
- char postfix[16] = {0}, prefix[16] = {0};
+ char postfix[16] = {0}, prefix[32] = {0};
static const char * const str[] = {
[NOT_INIT] = "?",
- [SCALAR_VALUE] = "inv",
+ [SCALAR_VALUE] = "scalar",
[PTR_TO_CTX] = "ctx",
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
@@ -553,7 +551,6 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
- [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_BUF] = "buf",
[PTR_TO_FUNC] = "func",
@@ -561,17 +558,22 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
};
if (type & PTR_MAYBE_NULL) {
- if (base_type(type) == PTR_TO_BTF_ID ||
- base_type(type) == PTR_TO_PERCPU_BTF_ID)
+ if (base_type(type) == PTR_TO_BTF_ID)
strncpy(postfix, "or_null_", 16);
else
strncpy(postfix, "_or_null", 16);
}
if (type & MEM_RDONLY)
- strncpy(prefix, "rdonly_", 16);
+ strncpy(prefix, "rdonly_", 32);
if (type & MEM_ALLOC)
- strncpy(prefix, "alloc_", 16);
+ strncpy(prefix, "alloc_", 32);
+ if (type & MEM_USER)
+ strncpy(prefix, "user_", 32);
+ if (type & MEM_PERCPU)
+ strncpy(prefix, "percpu_", 32);
+ if (type & PTR_UNTRUSTED)
+ strncpy(prefix, "untrusted_", 32);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -583,6 +585,7 @@ static char slot_type_char[] = {
[STACK_SPILL] = 'r',
[STACK_MISC] = 'm',
[STACK_ZERO] = '0',
+ [STACK_DYNPTR] = 'd',
};
static void print_liveness(struct bpf_verifier_env *env,
@@ -598,6 +601,25 @@ static void print_liveness(struct bpf_verifier_env *env,
verbose(env, "D");
}
+static int get_spi(s32 off)
+{
+ return (-off - 1) / BPF_REG_SIZE;
+}
+
+static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
+{
+ int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
+
+ /* We need to check that slots between [spi - nr_slots + 1, spi] are
+ * within [0, allocated_stack).
+ *
+ * Please note that the spi grows downwards. For example, a dynptr
+ * takes the size of two stack slots; the first slot will be at
+ * spi and the second slot will be at spi - 1.
+ */
+ return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
+}
+
static struct bpf_func_state *func(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg)
{
@@ -649,6 +671,132 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
env->scratched_stack_slots = ~0ULL;
}
+static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
+{
+ switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
+ case DYNPTR_TYPE_LOCAL:
+ return BPF_DYNPTR_TYPE_LOCAL;
+ case DYNPTR_TYPE_RINGBUF:
+ return BPF_DYNPTR_TYPE_RINGBUF;
+ default:
+ return BPF_DYNPTR_TYPE_INVALID;
+ }
+}
+
+static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
+{
+ return type == BPF_DYNPTR_TYPE_RINGBUF;
+}
+
+static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ enum bpf_arg_type arg_type, int insn_idx)
+{
+ struct bpf_func_state *state = func(env, reg);
+ enum bpf_dynptr_type type;
+ int spi, i, id;
+
+ spi = get_spi(reg->off);
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
+ return -EINVAL;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ state->stack[spi].slot_type[i] = STACK_DYNPTR;
+ state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
+ }
+
+ type = arg_to_dynptr_type(arg_type);
+ if (type == BPF_DYNPTR_TYPE_INVALID)
+ return -EINVAL;
+
+ state->stack[spi].spilled_ptr.dynptr.first_slot = true;
+ state->stack[spi].spilled_ptr.dynptr.type = type;
+ state->stack[spi - 1].spilled_ptr.dynptr.type = type;
+
+ if (dynptr_type_refcounted(type)) {
+ /* The id is used to track proper releasing */
+ id = acquire_reference_state(env, insn_idx);
+ if (id < 0)
+ return id;
+
+ state->stack[spi].spilled_ptr.id = id;
+ state->stack[spi - 1].spilled_ptr.id = id;
+ }
+
+ return 0;
+}
+
+static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi, i;
+
+ spi = get_spi(reg->off);
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
+ return -EINVAL;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ state->stack[spi].slot_type[i] = STACK_INVALID;
+ state->stack[spi - 1].slot_type[i] = STACK_INVALID;
+ }
+
+ /* Invalidate any slices associated with this dynptr */
+ if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
+ release_reference(env, state->stack[spi].spilled_ptr.id);
+ state->stack[spi].spilled_ptr.id = 0;
+ state->stack[spi - 1].spilled_ptr.id = 0;
+ }
+
+ state->stack[spi].spilled_ptr.dynptr.first_slot = false;
+ state->stack[spi].spilled_ptr.dynptr.type = 0;
+ state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
+
+ return 0;
+}
+
+static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+ int i;
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
+ return true;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
+ state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
+ return false;
+ }
+
+ return true;
+}
+
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ enum bpf_arg_type arg_type)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+ int i;
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
+ !state->stack[spi].spilled_ptr.dynptr.first_slot)
+ return false;
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
+ state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
+ return false;
+ }
+
+ /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
+ if (arg_type == ARG_PTR_TO_DYNPTR)
+ return true;
+
+ return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
+}
+
/* The reg state of a pointer or a bounded scalar was saved when
* it was spilled to the stack.
*/
@@ -682,74 +830,79 @@ static void print_verifier_state(struct bpf_verifier_env *env,
continue;
verbose(env, " R%d", i);
print_liveness(env, reg->live);
- verbose(env, "=%s", reg_type_str(env, t));
+ verbose(env, "=");
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
tnum_is_const(reg->var_off)) {
/* reg->off should be 0 for SCALAR_VALUE */
+ verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
- if (base_type(t) == PTR_TO_BTF_ID ||
- base_type(t) == PTR_TO_PERCPU_BTF_ID)
+ const char *sep = "";
+
+ verbose(env, "%s", reg_type_str(env, t));
+ if (base_type(t) == PTR_TO_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
- verbose(env, "(id=%d", reg->id);
- if (reg_type_may_be_refcounted_or_null(t))
- verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
+ verbose(env, "(");
+/*
+ * _a stands for append, was shortened to avoid multiline statements below.
+ * This macro is used to output a comma separated list of attributes.
+ */
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
+
+ if (reg->id)
+ verbose_a("id=%d", reg->id);
+ if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
+ verbose_a("ref_obj_id=%d", reg->ref_obj_id);
if (t != SCALAR_VALUE)
- verbose(env, ",off=%d", reg->off);
+ verbose_a("off=%d", reg->off);
if (type_is_pkt_pointer(t))
- verbose(env, ",r=%d", reg->range);
+ verbose_a("r=%d", reg->range);
else if (base_type(t) == CONST_PTR_TO_MAP ||
base_type(t) == PTR_TO_MAP_KEY ||
base_type(t) == PTR_TO_MAP_VALUE)
- verbose(env, ",ks=%d,vs=%d",
- reg->map_ptr->key_size,
- reg->map_ptr->value_size);
+ verbose_a("ks=%d,vs=%d",
+ reg->map_ptr->key_size,
+ reg->map_ptr->value_size);
if (tnum_is_const(reg->var_off)) {
/* Typically an immediate SCALAR_VALUE, but
* could be a pointer whose offset is too big
* for reg->off
*/
- verbose(env, ",imm=%llx", reg->var_off.value);
+ verbose_a("imm=%llx", reg->var_off.value);
} else {
if (reg->smin_value != reg->umin_value &&
reg->smin_value != S64_MIN)
- verbose(env, ",smin_value=%lld",
- (long long)reg->smin_value);
+ verbose_a("smin=%lld", (long long)reg->smin_value);
if (reg->smax_value != reg->umax_value &&
reg->smax_value != S64_MAX)
- verbose(env, ",smax_value=%lld",
- (long long)reg->smax_value);
+ verbose_a("smax=%lld", (long long)reg->smax_value);
if (reg->umin_value != 0)
- verbose(env, ",umin_value=%llu",
- (unsigned long long)reg->umin_value);
+ verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
if (reg->umax_value != U64_MAX)
- verbose(env, ",umax_value=%llu",
- (unsigned long long)reg->umax_value);
+ verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
if (!tnum_is_unknown(reg->var_off)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, ",var_off=%s", tn_buf);
+ verbose_a("var_off=%s", tn_buf);
}
if (reg->s32_min_value != reg->smin_value &&
reg->s32_min_value != S32_MIN)
- verbose(env, ",s32_min_value=%d",
- (int)(reg->s32_min_value));
+ verbose_a("s32_min=%d", (int)(reg->s32_min_value));
if (reg->s32_max_value != reg->smax_value &&
reg->s32_max_value != S32_MAX)
- verbose(env, ",s32_max_value=%d",
- (int)(reg->s32_max_value));
+ verbose_a("s32_max=%d", (int)(reg->s32_max_value));
if (reg->u32_min_value != reg->umin_value &&
reg->u32_min_value != U32_MIN)
- verbose(env, ",u32_min_value=%d",
- (int)(reg->u32_min_value));
+ verbose_a("u32_min=%d", (int)(reg->u32_min_value));
if (reg->u32_max_value != reg->umax_value &&
reg->u32_max_value != U32_MAX)
- verbose(env, ",u32_max_value=%d",
- (int)(reg->u32_max_value));
+ verbose_a("u32_max=%d", (int)(reg->u32_max_value));
}
+#undef verbose_a
+
verbose(env, ")");
}
}
@@ -774,7 +927,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (is_spilled_reg(&state->stack[i])) {
reg = &state->stack[i].spilled_ptr;
t = reg->type;
- verbose(env, "=%s", reg_type_str(env, t));
+ verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
@@ -1546,14 +1699,15 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
static void mark_btf_ld_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno,
enum bpf_reg_type reg_type,
- struct btf *btf, u32 btf_id)
+ struct btf *btf, u32 btf_id,
+ enum bpf_type_flag flag)
{
if (reg_type == SCALAR_VALUE) {
mark_reg_unknown(env, regs, regno);
return;
}
mark_reg_known_zero(env, regs, regno);
- regs[regno].type = PTR_TO_BTF_ID;
+ regs[regno].type = PTR_TO_BTF_ID | flag;
regs[regno].btf = btf;
regs[regno].btf_id = btf_id;
}
@@ -1743,7 +1897,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
}
static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
- s16 offset, struct module **btf_modp)
+ s16 offset)
{
struct bpf_kfunc_btf kf_btf = { .offset = offset };
struct bpf_kfunc_btf_tab *tab;
@@ -1797,8 +1951,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
kfunc_btf_cmp_by_off, NULL);
}
- if (btf_modp)
- *btf_modp = b->module;
return b->btf;
}
@@ -1814,9 +1966,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
kfree(tab);
}
-static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
- u32 func_id, s16 offset,
- struct module **btf_modp)
+static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
{
if (offset) {
if (offset < 0) {
@@ -1827,7 +1977,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
return ERR_PTR(-EINVAL);
}
- return __find_kfunc_desc_btf(env, offset, btf_modp);
+ return __find_kfunc_desc_btf(env, offset);
}
return btf_vmlinux ?: ERR_PTR(-ENOENT);
}
@@ -1841,6 +1991,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
struct bpf_kfunc_desc *desc;
const char *func_name;
struct btf *desc_btf;
+ unsigned long call_imm;
unsigned long addr;
int err;
@@ -1890,7 +2041,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
prog_aux->kfunc_btf_tab = btf_tab;
}
- desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+ desc_btf = find_kfunc_desc_btf(env, offset);
if (IS_ERR(desc_btf)) {
verbose(env, "failed to find BTF for kernel function\n");
return PTR_ERR(desc_btf);
@@ -1925,9 +2076,17 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return -EINVAL;
}
+ call_imm = BPF_CALL_IMM(addr);
+ /* Check whether or not the relative offset overflows desc->imm */
+ if ((unsigned long)(s32)call_imm != call_imm) {
+ verbose(env, "address of kernel function %s is out of range\n",
+ func_name);
+ return -EINVAL;
+ }
+
desc = &tab->descs[tab->nr_descs++];
desc->func_id = func_id;
- desc->imm = BPF_CALL_IMM(addr);
+ desc->imm = call_imm;
desc->offset = offset;
err = btf_distill_func_proto(&env->log, desc_btf,
func_proto, func_name,
@@ -2351,7 +2510,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
return NULL;
- desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+ desc_btf = find_kfunc_desc_btf(data, insn->off);
if (IS_ERR(desc_btf))
return "<error>";
@@ -2767,7 +2926,6 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
case PTR_TO_BUF:
- case PTR_TO_PERCPU_BTF_ID:
case PTR_TO_MEM:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
@@ -3197,7 +3355,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
return 0;
}
-enum stack_access_src {
+enum bpf_access_src {
ACCESS_DIRECT = 1, /* the access is performed by an instruction */
ACCESS_HELPER = 2, /* the access is performed by a helper */
};
@@ -3205,7 +3363,7 @@ enum stack_access_src {
static int check_stack_range_initialized(struct bpf_verifier_env *env,
int regno, int off, int access_size,
bool zero_size_allowed,
- enum stack_access_src type,
+ enum bpf_access_src type,
struct bpf_call_arg_meta *meta);
static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
@@ -3455,9 +3613,175 @@ static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
return 0;
}
+static int __check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno,
+ bool fixed_off_ok)
+{
+ /* Access to this pointer-typed register or passing it to a helper
+ * is only allowed in its original, unmodified form.
+ */
+
+ if (reg->off < 0) {
+ verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->off);
+ return -EACCES;
+ }
+
+ if (!fixed_off_ok && reg->off) {
+ verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->off);
+ return -EACCES;
+ }
+
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "variable %s access var_off=%s disallowed\n",
+ reg_type_str(env, reg->type), tn_buf);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno)
+{
+ return __check_ptr_off_reg(env, reg, regno, false);
+}
+
+static int map_kptr_match_type(struct bpf_verifier_env *env,
+ struct bpf_map_value_off_desc *off_desc,
+ struct bpf_reg_state *reg, u32 regno)
+{
+ const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id);
+ int perm_flags = PTR_MAYBE_NULL;
+ const char *reg_name = "";
+
+ /* Only unreferenced case accepts untrusted pointers */
+ if (off_desc->type == BPF_KPTR_UNREF)
+ perm_flags |= PTR_UNTRUSTED;
+
+ if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
+ goto bad_type;
+
+ if (!btf_is_kernel(reg->btf)) {
+ verbose(env, "R%d must point to kernel BTF\n", regno);
+ return -EINVAL;
+ }
+ /* We need to verify reg->type and reg->btf, before accessing reg->btf */
+ reg_name = kernel_type_name(reg->btf, reg->btf_id);
+
+ /* For ref_ptr case, release function check should ensure we get one
+ * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
+ * normal store of unreferenced kptr, we must ensure var_off is zero.
+ * Since ref_ptr cannot be accessed directly by BPF insns, checks for
+ * reg->off and reg->ref_obj_id are not needed here.
+ */
+ if (__check_ptr_off_reg(env, reg, regno, true))
+ return -EACCES;
+
+ /* A full type match is needed, as BTF can be vmlinux or module BTF, and
+ * we also need to take into account the reg->off.
+ *
+ * We want to support cases like:
+ *
+ * struct foo {
+ * struct bar br;
+ * struct baz bz;
+ * };
+ *
+ * struct foo *v;
+ * v = func(); // PTR_TO_BTF_ID
+ * val->foo = v; // reg->off is zero, btf and btf_id match type
+ * val->bar = &v->br; // reg->off is still zero, but we need to retry with
+ * // first member type of struct after comparison fails
+ * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
+ * // to match type
+ *
+ * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
+ * is zero. We must also ensure that btf_struct_ids_match does not walk
+ * the struct to match type against first member of struct, i.e. reject
+ * second case from above. Hence, when type is BPF_KPTR_REF, we set
+ * strict mode to true for type match.
+ */
+ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
+ off_desc->kptr.btf, off_desc->kptr.btf_id,
+ off_desc->type == BPF_KPTR_REF))
+ goto bad_type;
+ return 0;
+bad_type:
+ verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
+ reg_type_str(env, reg->type), reg_name);
+ verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
+ if (off_desc->type == BPF_KPTR_UNREF)
+ verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
+ targ_name);
+ else
+ verbose(env, "\n");
+ return -EINVAL;
+}
+
+static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
+ int value_regno, int insn_idx,
+ struct bpf_map_value_off_desc *off_desc)
+{
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
+ int class = BPF_CLASS(insn->code);
+ struct bpf_reg_state *val_reg;
+
+ /* Things we already checked for in check_map_access and caller:
+ * - Reject cases where variable offset may touch kptr
+ * - size of access (must be BPF_DW)
+ * - tnum_is_const(reg->var_off)
+ * - off_desc->offset == off + reg->var_off.value
+ */
+ /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
+ if (BPF_MODE(insn->code) != BPF_MEM) {
+ verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
+ return -EACCES;
+ }
+
+ /* We only allow loading referenced kptr, since it will be marked as
+ * untrusted, similar to unreferenced kptr.
+ */
+ if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) {
+ verbose(env, "store to referenced kptr disallowed\n");
+ return -EACCES;
+ }
+
+ if (class == BPF_LDX) {
+ val_reg = reg_state(env, value_regno);
+ /* We can simply mark the value_regno receiving the pointer
+ * value from map as PTR_TO_BTF_ID, with the correct type.
+ */
+ mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf,
+ off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
+ /* For mark_ptr_or_null_reg */
+ val_reg->id = ++env->id_gen;
+ } else if (class == BPF_STX) {
+ val_reg = reg_state(env, value_regno);
+ if (!register_is_null(val_reg) &&
+ map_kptr_match_type(env, off_desc, val_reg, value_regno))
+ return -EACCES;
+ } else if (class == BPF_ST) {
+ if (insn->imm) {
+ verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
+ off_desc->offset);
+ return -EACCES;
+ }
+ } else {
+ verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
+ return -EACCES;
+ }
+ return 0;
+}
+
/* check read/write into a map element with possible variable offset */
static int check_map_access(struct bpf_verifier_env *env, u32 regno,
- int off, int size, bool zero_size_allowed)
+ int off, int size, bool zero_size_allowed,
+ enum bpf_access_src src)
{
struct bpf_verifier_state *vstate = env->cur_state;
struct bpf_func_state *state = vstate->frame[vstate->curframe];
@@ -3493,16 +3817,41 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
}
+ if (map_value_has_kptrs(map)) {
+ struct bpf_map_value_off *tab = map->kptr_off_tab;
+ int i;
+
+ for (i = 0; i < tab->nr_off; i++) {
+ u32 p = tab->off[i].offset;
+
+ if (reg->smin_value + off < p + sizeof(u64) &&
+ p < reg->umax_value + off + size) {
+ if (src != ACCESS_DIRECT) {
+ verbose(env, "kptr cannot be accessed indirectly by helper\n");
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "kptr access cannot have variable offset\n");
+ return -EACCES;
+ }
+ if (p != off + reg->var_off.value) {
+ verbose(env, "kptr access misaligned expected=%u off=%llu\n",
+ p, off + reg->var_off.value);
+ return -EACCES;
+ }
+ if (size != bpf_size_to_bytes(BPF_DW)) {
+ verbose(env, "kptr access size must be BPF_DW\n");
+ return -EACCES;
+ }
+ break;
+ }
+ }
+ }
return err;
}
#define MAX_PACKET_OFF 0xffff
-static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
-{
- return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
-}
-
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_access_type t)
@@ -3971,38 +4320,6 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
}
#endif
-static int __check_ptr_off_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno,
- bool fixed_off_ok)
-{
- /* Access to this pointer-typed register or passing it to a helper
- * is only allowed in its original, unmodified form.
- */
-
- if (!fixed_off_ok && reg->off) {
- verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
- reg_type_str(env, reg->type), regno, reg->off);
- return -EACCES;
- }
-
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
- char tn_buf[48];
-
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable %s access var_off=%s disallowed\n",
- reg_type_str(env, reg->type), tn_buf);
- return -EACCES;
- }
-
- return 0;
-}
-
-int check_ptr_off_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno)
-{
- return __check_ptr_off_reg(env, reg, regno, false);
-}
-
static int __check_buffer_access(struct bpf_verifier_env *env,
const char *buf_info,
const struct bpf_reg_state *reg,
@@ -4047,9 +4364,9 @@ static int check_buffer_access(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int regno, int off, int size,
bool zero_size_allowed,
- const char *buf_info,
u32 *max_access)
{
+ const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
int err;
err = __check_buffer_access(env, buf_info, reg, regno, off, size);
@@ -4159,6 +4476,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = regs + regno;
const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
const char *tname = btf_name_by_offset(reg->btf, t->name_off);
+ enum bpf_type_flag flag = 0;
u32 btf_id;
int ret;
@@ -4178,9 +4496,23 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
+ if (reg->type & MEM_USER) {
+ verbose(env,
+ "R%d is ptr_%s access user memory: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+
+ if (reg->type & MEM_PERCPU) {
+ verbose(env,
+ "R%d is ptr_%s access percpu memory: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+
if (env->ops->btf_struct_access) {
ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
- off, size, atype, &btf_id);
+ off, size, atype, &btf_id, &flag);
} else {
if (atype != BPF_READ) {
verbose(env, "only read is supported\n");
@@ -4188,14 +4520,20 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
}
ret = btf_struct_access(&env->log, reg->btf, t, off, size,
- atype, &btf_id);
+ atype, &btf_id, &flag);
}
if (ret < 0)
return ret;
+ /* If this is an untrusted pointer, all pointers formed by walking it
+ * also inherit the untrusted flag.
+ */
+ if (type_flag(reg->type) & PTR_UNTRUSTED)
+ flag |= PTR_UNTRUSTED;
+
if (atype == BPF_READ && value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
return 0;
}
@@ -4208,6 +4546,7 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
{
struct bpf_reg_state *reg = regs + regno;
struct bpf_map *map = reg->map_ptr;
+ enum bpf_type_flag flag = 0;
const struct btf_type *t;
const char *tname;
u32 btf_id;
@@ -4245,12 +4584,12 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
return -EACCES;
}
- ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
+ ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
if (ret < 0)
return ret;
if (value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
+ mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
return 0;
}
@@ -4285,7 +4624,7 @@ static int check_stack_slot_within_bounds(int off,
static int check_stack_access_within_bounds(
struct bpf_verifier_env *env,
int regno, int off, int access_size,
- enum stack_access_src src, enum bpf_access_type type)
+ enum bpf_access_src src, enum bpf_access_type type)
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
@@ -4381,6 +4720,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_MAP_VALUE) {
+ struct bpf_map_value_off_desc *kptr_off_desc = NULL;
+
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose(env, "R%d leaks addr into map\n", value_regno);
@@ -4389,8 +4730,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_map_access_type(env, regno, off, size, t);
if (err)
return err;
- err = check_map_access(env, regno, off, size, false);
- if (!err && t == BPF_READ && value_regno >= 0) {
+ err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
+ if (err)
+ return err;
+ if (tnum_is_const(reg->var_off))
+ kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr,
+ off + reg->var_off.value);
+ if (kptr_off_desc) {
+ err = check_map_kptr_access(env, regno, value_regno, insn_idx,
+ kptr_off_desc);
+ } else if (t == BPF_READ && value_regno >= 0) {
struct bpf_map *map = reg->map_ptr;
/* if map is read-only, track its contents as scalars */
@@ -4451,7 +4800,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
+ &btf_id);
if (err)
verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
@@ -4535,7 +4885,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
- } else if (reg->type == PTR_TO_BTF_ID) {
+ } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
+ !type_may_be_null(reg->type)) {
err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
value_regno);
} else if (reg->type == CONST_PTR_TO_MAP) {
@@ -4543,7 +4894,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
value_regno);
} else if (base_type(reg->type) == PTR_TO_BUF) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
- const char *buf_info;
u32 *max_access;
if (rdonly_mem) {
@@ -4552,15 +4902,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
regno, reg_type_str(env, reg->type));
return -EACCES;
}
- buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
- buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
}
err = check_buffer_access(env, reg, regno, off, size, false,
- buf_info, max_access);
+ max_access);
if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
mark_reg_unknown(env, regs, value_regno);
@@ -4694,7 +5042,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
static int check_stack_range_initialized(
struct bpf_verifier_env *env, int regno, int off,
int access_size, bool zero_size_allowed,
- enum stack_access_src type, struct bpf_call_arg_meta *meta)
+ enum bpf_access_src type, struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_func_state *state = func(env, reg);
@@ -4781,7 +5129,7 @@ static int check_stack_range_initialized(
}
if (is_spilled_reg(&state->stack[spi]) &&
- state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
+ base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
goto mark;
if (is_spilled_reg(&state->stack[spi]) &&
@@ -4823,7 +5171,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- const char *buf_info;
u32 *max_access;
switch (base_type(reg->type)) {
@@ -4832,6 +5179,11 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_KEY:
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+ }
return check_mem_region_access(env, regno, reg->off, access_size,
reg->map_ptr->key_size, false);
case PTR_TO_MAP_VALUE:
@@ -4840,25 +5192,33 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
BPF_READ))
return -EACCES;
return check_map_access(env, regno, reg->off, access_size,
- zero_size_allowed);
+ zero_size_allowed, ACCESS_HELPER);
case PTR_TO_MEM:
+ if (type_is_rdonly_mem(reg->type)) {
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
+ return -EACCES;
+ }
+ }
return check_mem_region_access(env, regno, reg->off,
access_size, reg->mem_size,
zero_size_allowed);
case PTR_TO_BUF:
if (type_is_rdonly_mem(reg->type)) {
- if (meta && meta->raw_mode)
+ if (meta && meta->raw_mode) {
+ verbose(env, "R%d cannot write into %s\n", regno,
+ reg_type_str(env, reg->type));
return -EACCES;
+ }
- buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
- buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
}
return check_buffer_access(env, reg, regno, reg->off,
access_size, zero_size_allowed,
- buf_info, max_access);
+ max_access);
case PTR_TO_STACK:
return check_stack_range_initialized(
env,
@@ -4877,27 +5237,119 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
}
}
+static int check_mem_size_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ bool zero_size_allowed,
+ struct bpf_call_arg_meta *meta)
+{
+ int err;
+
+ /* This is used to refine r0 return value bounds for helpers
+ * that enforce this value as an upper bound on return values.
+ * See do_refine_retval_range() for helpers that can refine
+ * the return value. C type of helper is u32 so we pull register
+ * bound from umax_value however, if negative verifier errors
+ * out. Only upper bounds can be learned because retval is an
+ * int type and negative retvals are allowed.
+ */
+ meta->msize_max_value = reg->umax_value;
+
+ /* The register is SCALAR_VALUE; the access check
+ * happens using its boundaries.
+ */
+ if (!tnum_is_const(reg->var_off))
+ /* For unprivileged variable accesses, disable raw
+ * mode so that the program is required to
+ * initialize all the memory that the helper could
+ * just partially fill up.
+ */
+ meta = NULL;
+
+ if (reg->smin_value < 0) {
+ verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+ regno);
+ return -EACCES;
+ }
+
+ if (reg->umin_value == 0) {
+ err = check_helper_mem_access(env, regno - 1, 0,
+ zero_size_allowed,
+ meta);
+ if (err)
+ return err;
+ }
+
+ if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+ verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+ regno);
+ return -EACCES;
+ }
+ err = check_helper_mem_access(env, regno - 1,
+ reg->umax_value,
+ zero_size_allowed, meta);
+ if (!err)
+ err = mark_chain_precision(env, regno);
+ return err;
+}
+
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size)
{
+ bool may_be_null = type_may_be_null(reg->type);
+ struct bpf_reg_state saved_reg;
+ struct bpf_call_arg_meta meta;
+ int err;
+
if (register_is_null(reg))
return 0;
- if (type_may_be_null(reg->type)) {
- /* Assuming that the register contains a value check if the memory
- * access is safe. Temporarily save and restore the register's state as
- * the conversion shouldn't be visible to a caller.
- */
- const struct bpf_reg_state saved_reg = *reg;
- int rv;
-
+ memset(&meta, 0, sizeof(meta));
+ /* Assuming that the register contains a value check if the memory
+ * access is safe. Temporarily save and restore the register's state as
+ * the conversion shouldn't be visible to a caller.
+ */
+ if (may_be_null) {
+ saved_reg = *reg;
mark_ptr_not_null_reg(reg);
- rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
+ }
+
+ err = check_helper_mem_access(env, regno, mem_size, true, &meta);
+ /* Check access for BPF_WRITE */
+ meta.raw_mode = true;
+ err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
+
+ if (may_be_null)
*reg = saved_reg;
- return rv;
+
+ return err;
+}
+
+int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno)
+{
+ struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
+ bool may_be_null = type_may_be_null(mem_reg->type);
+ struct bpf_reg_state saved_reg;
+ struct bpf_call_arg_meta meta;
+ int err;
+
+ WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
+
+ memset(&meta, 0, sizeof(meta));
+
+ if (may_be_null) {
+ saved_reg = *mem_reg;
+ mark_ptr_not_null_reg(mem_reg);
}
- return check_helper_mem_access(env, regno, mem_size, true, NULL);
+ err = check_mem_size_reg(env, reg, regno, true, &meta);
+ /* Check access for BPF_WRITE */
+ meta.raw_mode = true;
+ err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
+
+ if (may_be_null)
+ *mem_reg = saved_reg;
+ return err;
}
/* Implementation details:
@@ -5029,10 +5481,51 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
return 0;
}
-static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
+static int process_kptr_func(struct bpf_verifier_env *env, int regno,
+ struct bpf_call_arg_meta *meta)
{
- return base_type(type) == ARG_PTR_TO_MEM ||
- base_type(type) == ARG_PTR_TO_UNINIT_MEM;
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_map_value_off_desc *off_desc;
+ struct bpf_map *map_ptr = reg->map_ptr;
+ u32 kptr_off;
+ int ret;
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env,
+ "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
+ regno);
+ return -EINVAL;
+ }
+ if (!map_ptr->btf) {
+ verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
+ map_ptr->name);
+ return -EINVAL;
+ }
+ if (!map_value_has_kptrs(map_ptr)) {
+ ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab);
+ if (ret == -E2BIG)
+ verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
+ BPF_MAP_VALUE_OFF_MAX);
+ else if (ret == -EEXIST)
+ verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
+ else
+ verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
+ return -EINVAL;
+ }
+
+ meta->map_ptr = map_ptr;
+ kptr_off = reg->off + reg->var_off.value;
+ off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
+ if (!off_desc) {
+ verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
+ return -EACCES;
+ }
+ if (off_desc->type != BPF_KPTR_REF) {
+ verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
+ return -EACCES;
+ }
+ meta->kptr_off_desc = off_desc;
+ return 0;
}
static bool arg_type_is_mem_size(enum bpf_arg_type type)
@@ -5052,6 +5545,16 @@ static bool arg_type_is_int_ptr(enum bpf_arg_type type)
type == ARG_PTR_TO_LONG;
}
+static bool arg_type_is_release(enum bpf_arg_type type)
+{
+ return type & OBJ_RELEASE;
+}
+
+static bool arg_type_is_dynptr(enum bpf_arg_type type)
+{
+ return base_type(type) == ARG_PTR_TO_DYNPTR;
+}
+
static int int_ptr_type_to_size(enum bpf_arg_type type)
{
if (type == ARG_PTR_TO_INT)
@@ -5159,16 +5662,16 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | ME
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
-static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
+static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
[ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
- [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
[ARG_CONST_SIZE] = &scalar_types,
[ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
[ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
@@ -5182,7 +5685,6 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
[ARG_PTR_TO_MEM] = &mem_types,
- [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
[ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
@@ -5191,11 +5693,14 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_STACK] = &stack_ptr_types,
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
[ARG_PTR_TO_TIMER] = &timer_types,
+ [ARG_PTR_TO_KPTR] = &kptr_types,
+ [ARG_PTR_TO_DYNPTR] = &stack_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
- const u32 *arg_btf_id)
+ const u32 *arg_btf_id,
+ struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
enum bpf_reg_type expected, type = reg->type;
@@ -5240,6 +5745,13 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
found:
if (reg->type == PTR_TO_BTF_ID) {
+ /* For bpf_sk_release, it needs to match against first member
+ * 'struct sock_common', hence make an exception for it. This
+ * allows bpf_sk_release to work for multiple socket types.
+ */
+ bool strict_type_match = arg_type_is_release(arg_type) &&
+ meta->func_id != BPF_FUNC_sk_release;
+
if (!arg_btf_id) {
if (!compatible->btf_id) {
verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
@@ -5248,8 +5760,12 @@ found:
arg_btf_id = compatible->btf_id;
}
- if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
- btf_vmlinux, *arg_btf_id)) {
+ if (meta->func_id == BPF_FUNC_kptr_xchg) {
+ if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
+ return -EACCES;
+ } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
+ btf_vmlinux, *arg_btf_id,
+ strict_type_match)) {
verbose(env, "R%d is of type %s but %s is expected\n",
regno, kernel_type_name(reg->btf, reg->btf_id),
kernel_type_name(btf_vmlinux, *arg_btf_id));
@@ -5260,6 +5776,70 @@ found:
return 0;
}
+int check_func_arg_reg_off(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno,
+ enum bpf_arg_type arg_type)
+{
+ enum bpf_reg_type type = reg->type;
+ bool fixed_off_ok = false;
+
+ switch ((u32)type) {
+ /* Pointer types where reg offset is explicitly allowed: */
+ case PTR_TO_STACK:
+ if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
+ verbose(env, "cannot pass in dynptr at an offset\n");
+ return -EINVAL;
+ }
+ fallthrough;
+ case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_MEM | MEM_RDONLY:
+ case PTR_TO_MEM | MEM_ALLOC:
+ case PTR_TO_BUF:
+ case PTR_TO_BUF | MEM_RDONLY:
+ case SCALAR_VALUE:
+ /* Some of the argument types nevertheless require a
+ * zero register offset.
+ */
+ if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
+ return 0;
+ break;
+ /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
+ * fixed offset.
+ */
+ case PTR_TO_BTF_ID:
+ /* When referenced PTR_TO_BTF_ID is passed to release function,
+ * it's fixed offset must be 0. In the other cases, fixed offset
+ * can be non-zero.
+ */
+ if (arg_type_is_release(arg_type) && reg->off) {
+ verbose(env, "R%d must have zero offset when passed to release func\n",
+ regno);
+ return -EINVAL;
+ }
+ /* For arg is release pointer, fixed_off_ok must be false, but
+ * we already checked and rejected reg->off != 0 above, so set
+ * to true to allow fixed offset for all other cases.
+ */
+ fixed_off_ok = true;
+ break;
+ default:
+ break;
+ }
+ return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
+}
+
+static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+
+ return state->stack[spi].spilled_ptr.id;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
@@ -5292,8 +5872,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
- if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
- base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
+ if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
err = resolve_map_arg_type(env, meta, &arg_type);
if (err)
return err;
@@ -5305,40 +5884,37 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
*/
goto skip_type_check;
- err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
+ err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
if (err)
return err;
- switch ((u32)type) {
- case SCALAR_VALUE:
- /* Pointer types where reg offset is explicitly allowed: */
- case PTR_TO_PACKET:
- case PTR_TO_PACKET_META:
- case PTR_TO_MAP_KEY:
- case PTR_TO_MAP_VALUE:
- case PTR_TO_MEM:
- case PTR_TO_MEM | MEM_RDONLY:
- case PTR_TO_MEM | MEM_ALLOC:
- case PTR_TO_BUF:
- case PTR_TO_BUF | MEM_RDONLY:
- case PTR_TO_STACK:
- /* Some of the argument types nevertheless require a
- * zero register offset.
- */
- if (arg_type == ARG_PTR_TO_ALLOC_MEM)
- goto force_off_check;
- break;
- /* All the rest must be rejected: */
- default:
-force_off_check:
- err = __check_ptr_off_reg(env, reg, regno,
- type == PTR_TO_BTF_ID);
- if (err < 0)
- return err;
- break;
- }
+ err = check_func_arg_reg_off(env, reg, regno, arg_type);
+ if (err)
+ return err;
skip_type_check:
+ if (arg_type_is_release(arg_type)) {
+ if (arg_type_is_dynptr(arg_type)) {
+ struct bpf_func_state *state = func(env, reg);
+ int spi = get_spi(reg->off);
+
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
+ !state->stack[spi].spilled_ptr.id) {
+ verbose(env, "arg %d is an unacquired reference\n", regno);
+ return -EINVAL;
+ }
+ } else if (!reg->ref_obj_id && !register_is_null(reg)) {
+ verbose(env, "R%d must be referenced when passed to release function\n",
+ regno);
+ return -EINVAL;
+ }
+ if (meta->release_regno) {
+ verbose(env, "verifier internal error: more than one release argument\n");
+ return -EFAULT;
+ }
+ meta->release_regno = regno;
+ }
+
if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
@@ -5391,8 +5967,7 @@ skip_type_check:
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
- } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
- base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
+ } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
if (type_may_be_null(arg_type) && register_is_null(reg))
return 0;
@@ -5404,7 +5979,7 @@ skip_type_check:
verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
- meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
+ meta->raw_mode = arg_type & MEM_UNINIT;
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
@@ -5431,59 +6006,49 @@ skip_type_check:
return -EACCES;
} else if (arg_type == ARG_PTR_TO_FUNC) {
meta->subprogno = reg->subprogno;
- } else if (arg_type_is_mem_ptr(arg_type)) {
+ } else if (base_type(arg_type) == ARG_PTR_TO_MEM) {
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
*/
- meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
+ meta->raw_mode = arg_type & MEM_UNINIT;
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
- /* This is used to refine r0 return value bounds for helpers
- * that enforce this value as an upper bound on return values.
- * See do_refine_retval_range() for helpers that can refine
- * the return value. C type of helper is u32 so we pull register
- * bound from umax_value however, if negative verifier errors
- * out. Only upper bounds can be learned because retval is an
- * int type and negative retvals are allowed.
- */
- meta->msize_max_value = reg->umax_value;
+ err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
+ } else if (arg_type_is_dynptr(arg_type)) {
+ if (arg_type & MEM_UNINIT) {
+ if (!is_dynptr_reg_valid_uninit(env, reg)) {
+ verbose(env, "Dynptr has to be an uninitialized dynptr\n");
+ return -EINVAL;
+ }
- /* The register is SCALAR_VALUE; the access check
- * happens using its boundaries.
- */
- if (!tnum_is_const(reg->var_off))
- /* For unprivileged variable accesses, disable raw
- * mode so that the program is required to
- * initialize all the memory that the helper could
- * just partially fill up.
+ /* We only support one dynptr being uninitialized at the moment,
+ * which is sufficient for the helper functions we have right now.
*/
- meta = NULL;
+ if (meta->uninit_dynptr_regno) {
+ verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
+ return -EFAULT;
+ }
- if (reg->smin_value < 0) {
- verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
- regno);
- return -EACCES;
- }
+ meta->uninit_dynptr_regno = regno;
+ } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) {
+ const char *err_extra = "";
- if (reg->umin_value == 0) {
- err = check_helper_mem_access(env, regno - 1, 0,
- zero_size_allowed,
- meta);
- if (err)
- return err;
- }
+ switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
+ case DYNPTR_TYPE_LOCAL:
+ err_extra = "local ";
+ break;
+ case DYNPTR_TYPE_RINGBUF:
+ err_extra = "ringbuf ";
+ break;
+ default:
+ break;
+ }
- if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
- verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
- regno);
- return -EACCES;
+ verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
+ err_extra, arg + 1);
+ return -EINVAL;
}
- err = check_helper_mem_access(env, regno - 1,
- reg->umax_value,
- zero_size_allowed, meta);
- if (!err)
- err = mark_chain_precision(env, regno);
} else if (arg_type_is_alloc_size(arg_type)) {
if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n",
@@ -5520,7 +6085,8 @@ skip_type_check:
}
err = check_map_access(env, regno, reg->off,
- map->value_size - reg->off, false);
+ map->value_size - reg->off, false,
+ ACCESS_HELPER);
if (err)
return err;
@@ -5536,6 +6102,9 @@ skip_type_check:
verbose(env, "string is not zero-terminated\n");
return -EINVAL;
}
+ } else if (arg_type == ARG_PTR_TO_KPTR) {
+ if (process_kptr_func(env, regno, meta))
+ return -EACCES;
}
return err;
@@ -5601,7 +6170,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_RINGBUF:
if (func_id != BPF_FUNC_ringbuf_output &&
func_id != BPF_FUNC_ringbuf_reserve &&
- func_id != BPF_FUNC_ringbuf_query)
+ func_id != BPF_FUNC_ringbuf_query &&
+ func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
+ func_id != BPF_FUNC_ringbuf_submit_dynptr &&
+ func_id != BPF_FUNC_ringbuf_discard_dynptr)
goto error;
break;
case BPF_MAP_TYPE_STACK_TRACE:
@@ -5717,6 +6289,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_ringbuf_output:
case BPF_FUNC_ringbuf_reserve:
case BPF_FUNC_ringbuf_query:
+ case BPF_FUNC_ringbuf_reserve_dynptr:
+ case BPF_FUNC_ringbuf_submit_dynptr:
+ case BPF_FUNC_ringbuf_discard_dynptr:
if (map->map_type != BPF_MAP_TYPE_RINGBUF)
goto error;
break;
@@ -5771,6 +6346,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
goto error;
break;
+ case BPF_FUNC_map_lookup_percpu_elem:
+ if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
+ goto error;
+ break;
case BPF_FUNC_sk_storage_get:
case BPF_FUNC_sk_storage_delete:
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
@@ -5822,10 +6403,8 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
enum bpf_arg_type arg_next)
{
- return (arg_type_is_mem_ptr(arg_curr) &&
- !arg_type_is_mem_size(arg_next)) ||
- (!arg_type_is_mem_ptr(arg_curr) &&
- arg_type_is_mem_size(arg_next));
+ return (base_type(arg_curr) == ARG_PTR_TO_MEM) !=
+ arg_type_is_mem_size(arg_next);
}
static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
@@ -5836,7 +6415,7 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
* helper function specification.
*/
if (arg_type_is_mem_size(fn->arg1_type) ||
- arg_type_is_mem_ptr(fn->arg5_type) ||
+ base_type(fn->arg5_type) == ARG_PTR_TO_MEM ||
check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
@@ -5878,17 +6457,18 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
int i;
for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
- if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
+ if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false;
- if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
+ if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
return false;
}
return true;
}
-static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
+ struct bpf_call_arg_meta *meta)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
@@ -6383,7 +6963,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
func_id != BPF_FUNC_map_pop_elem &&
func_id != BPF_FUNC_map_peek_elem &&
func_id != BPF_FUNC_for_each_map_elem &&
- func_id != BPF_FUNC_redirect_map)
+ func_id != BPF_FUNC_redirect_map &&
+ func_id != BPF_FUNC_map_lookup_percpu_elem)
return 0;
if (map == NULL) {
@@ -6572,7 +7153,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn, func_id);
+ err = check_func_proto(fn, func_id, &meta);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
@@ -6605,8 +7186,35 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
- if (is_release_function(func_id)) {
- err = release_reference(env, meta.ref_obj_id);
+ regs = cur_regs(env);
+
+ if (meta.uninit_dynptr_regno) {
+ /* we write BPF_DW bits (8 bytes) at a time */
+ for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
+ err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
+ i, BPF_DW, BPF_WRITE, -1, false);
+ if (err)
+ return err;
+ }
+
+ err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
+ fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
+ insn_idx);
+ if (err)
+ return err;
+ }
+
+ if (meta.release_regno) {
+ err = -EINVAL;
+ if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
+ err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
+ else if (meta.ref_obj_id)
+ err = release_reference(env, meta.ref_obj_id);
+ /* meta.ref_obj_id can only be 0 if register that is meant to be
+ * released is NULL, which must be > R0.
+ */
+ else if (register_is_null(&regs[meta.release_regno]))
+ err = 0;
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
@@ -6614,8 +7222,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
}
- regs = cur_regs(env);
-
switch (func_id) {
case BPF_FUNC_tail_call:
err = check_reference_leak(env);
@@ -6652,6 +7258,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_loop_callback_state);
break;
+ case BPF_FUNC_dynptr_from_mem:
+ if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
+ verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
+ reg_type_str(env, regs[BPF_REG_1].type));
+ return -EACCES;
+ }
}
if (err)
@@ -6739,21 +7351,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
} else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
+ struct btf *ret_btf;
int ret_btf_id;
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
- ret_btf_id = *fn->ret_btf_id;
+ if (func_id == BPF_FUNC_kptr_xchg) {
+ ret_btf = meta.kptr_off_desc->kptr.btf;
+ ret_btf_id = meta.kptr_off_desc->kptr.btf_id;
+ } else {
+ ret_btf = btf_vmlinux;
+ ret_btf_id = *fn->ret_btf_id;
+ }
if (ret_btf_id == 0) {
verbose(env, "invalid return type %u of func %s#%d\n",
base_type(ret_type), func_id_name(func_id),
func_id);
return -EINVAL;
}
- /* current BPF helper definitions are only coming from
- * built-in code with type IDs from vmlinux BTF
- */
- regs[BPF_REG_0].btf = btf_vmlinux;
+ regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
} else {
verbose(env, "unknown return type %u of func %s#%d\n",
@@ -6776,6 +7392,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
+ } else if (func_id == BPF_FUNC_dynptr_data) {
+ int dynptr_id = 0, i;
+
+ /* Find the id of the dynptr we're acquiring a reference to */
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
+ if (arg_type_is_dynptr(fn->arg_type[i])) {
+ if (dynptr_id) {
+ verbose(env, "verifier internal error: multiple dynptr args in func\n");
+ return -EFAULT;
+ }
+ dynptr_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
+ }
+ }
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = dynptr_id;
}
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
@@ -6842,22 +7473,23 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
}
}
-static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
u32 i, nargs, func_id, ptr_type_id;
- struct module *btf_mod = NULL;
+ int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
struct btf *desc_btf;
- int err;
+ bool acq;
/* skip for now, but return error when we find this in fixup_kfunc_call */
if (!insn->imm)
return 0;
- desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+ desc_btf = find_kfunc_desc_btf(env, insn->off);
if (IS_ERR(desc_btf))
return PTR_ERR(desc_btf);
@@ -6866,23 +7498,43 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
func_name = btf_name_by_offset(desc_btf, func->name_off);
func_proto = btf_type_by_id(desc_btf, func->type);
- if (!env->ops->check_kfunc_call ||
- !env->ops->check_kfunc_call(func_id, btf_mod)) {
+ if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_CHECK, func_id)) {
verbose(env, "calling kernel function %s is not allowed\n",
func_name);
return -EACCES;
}
+ acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_ACQUIRE, func_id);
+
/* Check the arguments */
err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
- if (err)
+ if (err < 0)
return err;
+ /* In case of release function, we get register number of refcounted
+ * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
+ */
+ if (err) {
+ err = release_reference(env, regs[err].ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+ func_name, func_id);
+ return err;
+ }
+ }
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
/* Check return type */
t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
+
+ if (acq && !btf_type_is_ptr(t)) {
+ verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
+ return -EINVAL;
+ }
+
if (btf_type_is_scalar(t)) {
mark_reg_unknown(env, regs, BPF_REG_0);
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -6901,7 +7553,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
+ if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+ BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+ regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
+ /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
+ regs[BPF_REG_0].id = ++env->id_gen;
+ }
mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+ if (acq) {
+ int id = acquire_reference_state(env, insn_idx);
+
+ if (id < 0)
+ return id;
+ regs[BPF_REG_0].id = id;
+ regs[BPF_REG_0].ref_obj_id = id;
+ }
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
nargs = btf_type_vlen(func_proto);
@@ -7305,7 +7971,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env,
return -EACCES;
break;
case PTR_TO_MAP_VALUE:
- if (check_map_access(env, dst, dst_reg->off, 1, false)) {
+ if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
verbose(env, "R%d pointer arithmetic of map value goes out of range, "
"prohibited for !root\n", dst);
return -EACCES;
@@ -9548,7 +10214,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
dst_reg->mem_size = aux->btf_var.mem_size;
break;
case PTR_TO_BTF_ID:
- case PTR_TO_PERCPU_BTF_ID:
dst_reg->btf = aux->btf_var.btf;
dst_reg->btf_id = aux->btf_var.btf_id;
break;
@@ -10273,8 +10938,7 @@ static void adjust_btf_func(struct bpf_verifier_env *env)
aux->func_info[i].insn_off = env->subprog_info[i].start;
}
-#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
- sizeof(((struct bpf_line_info *)(0))->line_col))
+#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
static int check_btf_line(struct bpf_verifier_env *env,
@@ -11549,7 +12213,7 @@ static int do_check(struct bpf_verifier_env *env)
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
- err = check_kfunc_call(env, insn);
+ err = check_kfunc_call(env, insn, &env->insn_idx);
else
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
@@ -11748,7 +12412,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
type = t->type;
t = btf_type_skip_modifiers(btf, type, NULL);
if (percpu) {
- aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
+ aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
aux->btf_var.btf = btf;
aux->btf_var.btf_id = type;
} else if (!btf_type_is_struct(t)) {
@@ -12696,7 +13360,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (!ctx_access)
continue;
- switch (env->insn_aux_data[i + delta].ptr_type) {
+ switch ((int)env->insn_aux_data[i + delta].ptr_type) {
case PTR_TO_CTX:
if (!ops->convert_ctx_access)
continue;
@@ -12713,6 +13377,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
break;
case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID | PTR_UNTRUSTED:
if (type == BPF_READ) {
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
@@ -12897,6 +13562,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
+ func[i]->blinding_requested = prog->blinding_requested;
func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
func[i]->aux->linfo = prog->aux->linfo;
@@ -12992,6 +13658,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->jited = 1;
prog->bpf_func = func[0]->bpf_func;
+ prog->jited_len = func[0]->jited_len;
prog->aux->func = func;
prog->aux->func_cnt = env->subprog_cnt;
bpf_prog_jit_attempt_done(prog);
@@ -13019,6 +13686,7 @@ out_free:
out_undo_insn:
/* cleanup main prog to be interpreted */
prog->jit_requested = 0;
+ prog->blinding_requested = 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (!bpf_pseudo_call(insn))
continue;
@@ -13112,7 +13780,6 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
enum bpf_attach_type eatype = prog->expected_attach_type;
- bool expect_blinding = bpf_jit_blinding_enabled(prog);
enum bpf_prog_type prog_type = resolve_prog_type(prog);
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
@@ -13276,7 +13943,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
- if (env->bpf_capable && !expect_blinding &&
+ if (env->bpf_capable && !prog->blinding_requested &&
prog->jit_requested &&
!bpf_map_key_poisoned(aux) &&
!bpf_map_ptr_poisoned(aux) &&
@@ -13364,6 +14031,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm;
}
+ if (insn->imm == BPF_FUNC_task_storage_get ||
+ insn->imm == BPF_FUNC_sk_storage_get ||
+ insn->imm == BPF_FUNC_inode_storage_get) {
+ if (env->prog->aux->sleepable)
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
+ else
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
+ insn_buf[1] = *insn;
+ cnt = 2;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto patch_call_imm;
+ }
+
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
* and other inlining handlers are currently limited to 64 bit
* only.
@@ -13376,7 +14063,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->imm == BPF_FUNC_map_pop_elem ||
insn->imm == BPF_FUNC_map_peek_elem ||
insn->imm == BPF_FUNC_redirect_map ||
- insn->imm == BPF_FUNC_for_each_map_elem)) {
+ insn->imm == BPF_FUNC_for_each_map_elem ||
+ insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
aux = &env->insn_aux_data[i + delta];
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
@@ -13425,6 +14113,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
bpf_callback_t callback_fn,
void *callback_ctx,
u64 flags))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
+ (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
patch_map_ops_generic:
switch (insn->imm) {
@@ -13452,6 +14142,9 @@ patch_map_ops_generic:
case BPF_FUNC_for_each_map_elem:
insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
continue;
+ case BPF_FUNC_map_lookup_percpu_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
+ continue;
}
goto patch_call_imm;