From c501bf55c88b834adefda870c7c092ec9052a437 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Mar 2023 09:42:59 -1000 Subject: bpf: Make bpf_get_current_[ancestor_]cgroup_id() available for all program types These helpers are safe to call from any context and there's no reason to restrict access to them. Remove them from bpf_trace and filter lists and add to bpf_base_func_proto() under perfmon_capable(). v2: After consulting with Andrii, relocated in bpf_base_func_proto() so that they require bpf_capable() but not perfomon_capable() as it doesn't read from or affect others on the system. Signed-off-by: Tejun Heo Cc: Andrii Nakryiko Link: https://lore.kernel.org/r/ZAD8QyoszMZiTzBY@slm.duckdns.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/bpf/cgroup.c') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index bf2fdb33fb31..a4ae422b8f12 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -2529,10 +2529,6 @@ cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; #ifdef CONFIG_CGROUP_NET_CLASSID case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; -- cgit From 0d80a619c113d0e216dbffa56b2d5ccc079ee520 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 4 Mar 2023 03:12:45 +0200 Subject: bpf: allow ctx writes using BPF_ST_MEM instruction Lift verifier restriction to use BPF_ST_MEM instructions to write to context data structures. This requires the following changes: - verifier.c:do_check() for BPF_ST updated to: - no longer forbid writes to registers of type PTR_TO_CTX; - track dst_reg type in the env->insn_aux_data[...].ptr_type field (same way it is done for BPF_STX and BPF_LDX instructions). - verifier.c:convert_ctx_access() and various callbacks invoked by it are updated to handled BPF_ST instruction alongside BPF_STX. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230304011247.566040-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cgroup.c | 49 ++++++++----- kernel/bpf/verifier.c | 110 ++++++++++++++--------------- net/core/filter.c | 79 ++++++++++++--------- tools/testing/selftests/bpf/verifier/ctx.c | 11 --- 4 files changed, 126 insertions(+), 123 deletions(-) (limited to 'kernel/bpf/cgroup.c') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index a4ae422b8f12..53edb8ad2471 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -2223,10 +2223,12 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type, BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, ppos)); - *insn++ = BPF_STX_MEM( - BPF_SIZEOF(u32), treg, si->src_reg, + *insn++ = BPF_RAW_INSN( + BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32), + treg, si->src_reg, bpf_ctx_narrow_access_offset( - 0, sizeof(u32), sizeof(loff_t))); + 0, sizeof(u32), sizeof(loff_t)), + si->imm); *insn++ = BPF_LDX_MEM( BPF_DW, treg, si->dst_reg, offsetof(struct bpf_sysctl_kern, tmp_reg)); @@ -2376,10 +2378,17 @@ static bool cg_sockopt_is_valid_access(int off, int size, return true; } -#define CG_SOCKOPT_ACCESS_FIELD(T, F) \ - T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ - si->dst_reg, si->src_reg, \ - offsetof(struct bpf_sockopt_kern, F)) +#define CG_SOCKOPT_READ_FIELD(F) \ + BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sockopt_kern, F)) + +#define CG_SOCKOPT_WRITE_FIELD(F) \ + BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \ + BPF_MEM | BPF_CLASS(si->code)), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sockopt_kern, F), \ + si->imm) static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, @@ -2391,25 +2400,25 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, switch (si->off) { case offsetof(struct bpf_sockopt, sk): - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk); + *insn++ = CG_SOCKOPT_READ_FIELD(sk); break; case offsetof(struct bpf_sockopt, level): if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level); + *insn++ = CG_SOCKOPT_WRITE_FIELD(level); else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level); + *insn++ = CG_SOCKOPT_READ_FIELD(level); break; case offsetof(struct bpf_sockopt, optname): if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname); + *insn++ = CG_SOCKOPT_WRITE_FIELD(optname); else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname); + *insn++ = CG_SOCKOPT_READ_FIELD(optname); break; case offsetof(struct bpf_sockopt, optlen): if (type == BPF_WRITE) - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen); + *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen); else - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); + *insn++ = CG_SOCKOPT_READ_FIELD(optlen); break; case offsetof(struct bpf_sockopt, retval): BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); @@ -2429,9 +2438,11 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), treg, treg, offsetof(struct task_struct, bpf_ctx)); - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), - treg, si->src_reg, - offsetof(struct bpf_cg_run_ctx, retval)); + *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM | + BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), + treg, si->src_reg, + offsetof(struct bpf_cg_run_ctx, retval), + si->imm); *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, offsetof(struct bpf_sockopt_kern, tmp_reg)); } else { @@ -2447,10 +2458,10 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, } break; case offsetof(struct bpf_sockopt, optval): - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); + *insn++ = CG_SOCKOPT_READ_FIELD(optval); break; case offsetof(struct bpf_sockopt, optval_end): - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end); + *insn++ = CG_SOCKOPT_READ_FIELD(optval_end); break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c2adf3c24c64..4c5d2b5e25c8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14813,6 +14813,44 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) !reg_type_mismatch_ok(prev)); } +static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, + bool allow_trust_missmatch) +{ + enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type; + + if (*prev_type == NOT_INIT) { + /* Saw a valid insn + * dst_reg = *(u32 *)(src_reg + off) + * save type to validate intersecting paths + */ + *prev_type = type; + } else if (reg_type_mismatch(type, *prev_type)) { + /* Abuser program is trying to use the same insn + * dst_reg = *(u32*) (src_reg + off) + * with different pointer types: + * src_reg == ctx in one branch and + * src_reg == stack|map in some other branch. + * Reject it. + */ + if (allow_trust_missmatch && + base_type(type) == PTR_TO_BTF_ID && + base_type(*prev_type) == PTR_TO_BTF_ID) { + /* + * Have to support a use case when one path through + * the program yields TRUSTED pointer while another + * is UNTRUSTED. Fallback to UNTRUSTED to generate + * BPF_PROBE_MEM. + */ + *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + } else { + verbose(env, "same insn cannot be used with different pointers\n"); + return -EINVAL; + } + } + + return 0; +} + static int do_check(struct bpf_verifier_env *env) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); @@ -14922,7 +14960,7 @@ static int do_check(struct bpf_verifier_env *env) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type *prev_src_type, src_reg_type; + enum bpf_reg_type src_reg_type; /* check for reserved fields is already done */ @@ -14946,43 +14984,11 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; - - if (*prev_src_type == NOT_INIT) { - /* saw a valid insn - * dst_reg = *(u32 *)(src_reg + off) - * save type to validate intersecting paths - */ - *prev_src_type = src_reg_type; - - } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) { - /* ABuser program is trying to use the same insn - * dst_reg = *(u32*) (src_reg + off) - * with different pointer types: - * src_reg == ctx in one branch and - * src_reg == stack|map in some other branch. - * Reject it. - */ - if (base_type(src_reg_type) == PTR_TO_BTF_ID && - base_type(*prev_src_type) == PTR_TO_BTF_ID) { - /* - * Have to support a use case when one path through - * the program yields TRUSTED pointer while another - * is UNTRUSTED. Fallback to UNTRUSTED to generate - * BPF_PROBE_MEM. - */ - *prev_src_type = PTR_TO_BTF_ID | PTR_UNTRUSTED; - } else { - verbose(env, - "The same insn cannot be used with different pointers: %s", - reg_type_str(env, src_reg_type)); - verbose(env, " != %s\n", reg_type_str(env, *prev_src_type)); - return -EINVAL; - } - } - + err = save_aux_ptr_type(env, src_reg_type, true); + if (err) + return err; } else if (class == BPF_STX) { - enum bpf_reg_type *prev_dst_type, dst_reg_type; + enum bpf_reg_type dst_reg_type; if (BPF_MODE(insn->code) == BPF_ATOMIC) { err = check_atomic(env, env->insn_idx, insn); @@ -15015,16 +15021,12 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; - - if (*prev_dst_type == NOT_INIT) { - *prev_dst_type = dst_reg_type; - } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) { - verbose(env, "same insn cannot be used with different pointers\n"); - return -EINVAL; - } - + err = save_aux_ptr_type(env, dst_reg_type, false); + if (err) + return err; } else if (class == BPF_ST) { + enum bpf_reg_type dst_reg_type; + if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) { verbose(env, "BPF_ST uses reserved fields\n"); @@ -15035,12 +15037,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - if (is_ctx_reg(env, insn->dst_reg)) { - verbose(env, "BPF_ST stores into R%d %s is not allowed\n", - insn->dst_reg, - reg_type_str(env, reg_state(env, insn->dst_reg)->type)); - return -EACCES; - } + dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, env->insn_idx, insn->dst_reg, @@ -15049,6 +15046,9 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + err = save_aux_ptr_type(env, dst_reg_type, false); + if (err) + return err; } else if (class == BPF_JMP || class == BPF_JMP32) { u8 opcode = BPF_OP(insn->code); @@ -16157,14 +16157,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; - bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { type = BPF_READ; - ctx_access = true; } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || insn->code == (BPF_STX | BPF_MEM | BPF_H) || insn->code == (BPF_STX | BPF_MEM | BPF_W) || @@ -16174,7 +16172,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - ctx_access = BPF_CLASS(insn->code) == BPF_STX; } else { continue; } @@ -16197,9 +16194,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } - if (!ctx_access) - continue; - switch ((int)env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) diff --git a/net/core/filter.c b/net/core/filter.c index a2dc44e70ea0..50f649f1b4a9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9279,11 +9279,15 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, #endif /* : skb->tstamp = tstamp */ - *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg, - offsetof(struct sk_buff, tstamp)); + *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM, + skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm); return insn; } +#define BPF_EMIT_STORE(size, si, off) \ + BPF_RAW_INSN(BPF_CLASS((si)->code) | (size) | BPF_MEM, \ + (si)->dst_reg, (si)->src_reg, (off), (si)->imm) + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -9313,9 +9317,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, priority): if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, priority, 4, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + bpf_target_off(struct sk_buff, priority, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, priority, 4, @@ -9346,9 +9350,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, mark): if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, mark, 4, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + bpf_target_off(struct sk_buff, mark, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, mark, 4, @@ -9367,11 +9371,16 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, queue_mapping): if (type == BPF_WRITE) { - *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, - queue_mapping, - 2, target_size)); + u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size); + + if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) { + *insn++ = BPF_JMP_A(0); /* noop */ + break; + } + + if (BPF_CLASS(si->code) == BPF_STX) + *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1); + *insn++ = BPF_EMIT_STORE(BPF_H, si, off); } else { *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, @@ -9407,8 +9416,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct sk_buff, cb); off += offsetof(struct qdisc_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off); else *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); @@ -9423,8 +9431,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct qdisc_skb_cb, tc_classid); *target_size = 2; if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_H, si, off); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, off); @@ -9457,9 +9464,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - bpf_target_off(struct sk_buff, tc_index, 2, - target_size)); + *insn++ = BPF_EMIT_STORE(BPF_H, si, + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, bpf_target_off(struct sk_buff, tc_index, 2, @@ -9660,8 +9667,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_bound_dev_if)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_bound_dev_if)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_bound_dev_if)); @@ -9671,8 +9678,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_mark)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_mark)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_mark)); @@ -9682,8 +9689,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_priority)); + *insn++ = BPF_EMIT_STORE(BPF_W, si, + offsetof(struct sock, sk_priority)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, sk_priority)); @@ -9948,10 +9955,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, offsetof(S, TF)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ si->dst_reg, offsetof(S, F)); \ - *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \ + *insn++ = BPF_RAW_INSN(SIZE | BPF_MEM | BPF_CLASS(si->code), \ + tmp_reg, si->src_reg, \ bpf_target_off(NS, NF, sizeof_field(NS, NF), \ target_size) \ - + OFF); \ + + OFF, \ + si->imm); \ *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ offsetof(S, TF)); \ } while (0) @@ -10186,9 +10195,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, struct bpf_sock_ops_kern, sk),\ reg, si->dst_reg, \ offsetof(struct bpf_sock_ops_kern, sk));\ - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ - reg, si->src_reg, \ - offsetof(OBJ, OBJ_FIELD)); \ + *insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) | \ + BPF_MEM | BPF_CLASS(si->code), \ + reg, si->src_reg, \ + offsetof(OBJ, OBJ_FIELD), \ + si->imm); \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ offsetof(struct bpf_sock_ops_kern, \ temp)); \ @@ -10220,8 +10231,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, off -= offsetof(struct bpf_sock_ops, replylong[0]); off += offsetof(struct bpf_sock_ops_kern, replylong[0]); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - off); + *insn++ = BPF_EMIT_STORE(BPF_W, si, off); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); @@ -10578,8 +10588,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct sk_buff, cb); off += offsetof(struct sk_skb_cb, data); if (type == BPF_WRITE) - *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, - si->src_reg, off); + *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off); else *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, si->src_reg, off); diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c index c8eaf0536c24..2fd31612c0b8 100644 --- a/tools/testing/selftests/bpf/verifier/ctx.c +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -1,14 +1,3 @@ -{ - "context stores via ST", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_ST stores into R1 ctx is not allowed", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, { "context stores via BPF_ATOMIC", .insns = { -- cgit From 4cdb91b0dea7d7f59fa84a13c7753cd434fdedcf Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Fri, 3 Mar 2023 15:23:10 +0530 Subject: cgroup: bpf: use cgroup_lock()/cgroup_unlock() wrappers Replace mutex_[un]lock() with cgroup_[un]lock() wrappers to stay consistent across cgroup core and other subsystem code, while operating on the cgroup_mutex. Signed-off-by: Kamalesh Babulal Acked-by: Alexei Starovoitov Reviewed-by: Christian Brauner Signed-off-by: Tejun Heo --- kernel/bpf/cgroup.c | 38 ++++++++++++++--------------- kernel/bpf/cgroup_iter.c | 4 ++-- kernel/bpf/local_storage.c | 4 ++-- kernel/cgroup/cgroup-v1.c | 16 ++++++------- kernel/cgroup/cgroup.c | 60 +++++++++++++++++++++++----------------------- 5 files changed, 61 insertions(+), 61 deletions(-) (limited to 'kernel/bpf/cgroup.c') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index bf2fdb33fb31..3458701dcb28 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -173,11 +173,11 @@ void bpf_cgroup_atype_put(int cgroup_atype) { int i = cgroup_atype - CGROUP_LSM_START; - mutex_lock(&cgroup_mutex); + cgroup_lock(); if (--cgroup_lsm_atype[i].refcnt <= 0) cgroup_lsm_atype[i].attach_btf_id = 0; WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); } #else static enum cgroup_bpf_attach_type @@ -282,7 +282,7 @@ static void cgroup_bpf_release(struct work_struct *work) unsigned int atype; - mutex_lock(&cgroup_mutex); + cgroup_lock(); for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) { struct hlist_head *progs = &cgrp->bpf.progs[atype]; @@ -315,7 +315,7 @@ static void cgroup_bpf_release(struct work_struct *work) bpf_cgroup_storage_free(storage); } - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) cgroup_bpf_put(p); @@ -729,9 +729,9 @@ static int cgroup_bpf_attach(struct cgroup *cgrp, { int ret; - mutex_lock(&cgroup_mutex); + cgroup_lock(); ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -831,7 +831,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog, cg_link = container_of(link, struct bpf_cgroup_link, link); - mutex_lock(&cgroup_mutex); + cgroup_lock(); /* link might have been auto-released by dying cgroup, so fail */ if (!cg_link->cgroup) { ret = -ENOLINK; @@ -843,7 +843,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog, } ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); out_unlock: - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -1009,9 +1009,9 @@ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, { int ret; - mutex_lock(&cgroup_mutex); + cgroup_lock(); ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -1120,9 +1120,9 @@ static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, { int ret; - mutex_lock(&cgroup_mutex); + cgroup_lock(); ret = __cgroup_bpf_query(cgrp, attr, uattr); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -1189,11 +1189,11 @@ static void bpf_cgroup_link_release(struct bpf_link *link) if (!cg_link->cgroup) return; - mutex_lock(&cgroup_mutex); + cgroup_lock(); /* re-check cgroup under lock again */ if (!cg_link->cgroup) { - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return; } @@ -1205,7 +1205,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link) cg = cg_link->cgroup; cg_link->cgroup = NULL; - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); cgroup_put(cg); } @@ -1232,10 +1232,10 @@ static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link, container_of(link, struct bpf_cgroup_link, link); u64 cg_id = 0; - mutex_lock(&cgroup_mutex); + cgroup_lock(); if (cg_link->cgroup) cg_id = cgroup_id(cg_link->cgroup); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); seq_printf(seq, "cgroup_id:\t%llu\n" @@ -1251,10 +1251,10 @@ static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link, container_of(link, struct bpf_cgroup_link, link); u64 cg_id = 0; - mutex_lock(&cgroup_mutex); + cgroup_lock(); if (cg_link->cgroup) cg_id = cgroup_id(cg_link->cgroup); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); info->cgroup.cgroup_id = cg_id; info->cgroup.attach_type = cg_link->type; diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index 06989d278846..810378f04fbc 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -58,7 +58,7 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) { struct cgroup_iter_priv *p = seq->private; - mutex_lock(&cgroup_mutex); + cgroup_lock(); /* cgroup_iter doesn't support read across multiple sessions. */ if (*pos > 0) { @@ -89,7 +89,7 @@ static void cgroup_iter_seq_stop(struct seq_file *seq, void *v) { struct cgroup_iter_priv *p = seq->private; - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); /* pass NULL to the prog for post-processing */ if (!v) { diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index e90d9f63edc5..9bceefee14d5 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -333,14 +333,14 @@ static void cgroup_storage_map_free(struct bpf_map *_map) struct list_head *storages = &map->list; struct bpf_cgroup_storage *storage, *stmp; - mutex_lock(&cgroup_mutex); + cgroup_lock(); list_for_each_entry_safe(storage, stmp, storages, list_map) { bpf_cgroup_storage_unlink(storage); bpf_cgroup_storage_free(storage); } - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); WARN_ON(!RB_EMPTY_ROOT(&map->root)); WARN_ON(!list_empty(&map->list)); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 52bb5a74a23b..aeef06c465ef 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -58,7 +58,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) struct cgroup_root *root; int retval = 0; - mutex_lock(&cgroup_mutex); + cgroup_lock(); cgroup_attach_lock(true); for_each_root(root) { struct cgroup *from_cgrp; @@ -72,7 +72,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) break; } cgroup_attach_unlock(true); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return retval; } @@ -106,7 +106,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) if (ret) return ret; - mutex_lock(&cgroup_mutex); + cgroup_lock(); percpu_down_write(&cgroup_threadgroup_rwsem); @@ -145,7 +145,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) out_err: cgroup_migrate_finish(&mgctx); percpu_up_write(&cgroup_threadgroup_rwsem); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -847,13 +847,13 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent kernfs_break_active_protection(new_parent); kernfs_break_active_protection(kn); - mutex_lock(&cgroup_mutex); + cgroup_lock(); ret = kernfs_rename(kn, new_parent, new_name_str); if (!ret) TRACE_CGROUP_PATH(rename, cgrp); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); kernfs_unbreak_active_protection(kn); kernfs_unbreak_active_protection(new_parent); @@ -1119,7 +1119,7 @@ int cgroup1_reconfigure(struct fs_context *fc) trace_cgroup_remount(root); out_unlock: - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -1246,7 +1246,7 @@ int cgroup1_get_tree(struct fs_context *fc) if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) ret = 1; /* restart */ - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); if (!ret) ret = cgroup_do_get_tree(fc); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 935e8121b21e..83ea13f2ccb1 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1391,7 +1391,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) cgroup_favor_dynmods(root, false); cgroup_exit_root_id(root); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); cgroup_rstat_exit(cgrp); kernfs_destroy_root(root->kf_root); @@ -1625,7 +1625,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn) else cgrp = kn->parent->priv; - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); kernfs_unbreak_active_protection(kn); cgroup_put(cgrp); @@ -1670,7 +1670,7 @@ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline) if (drain_offline) cgroup_lock_and_drain_offline(cgrp); else - mutex_lock(&cgroup_mutex); + cgroup_lock(); if (!cgroup_is_dead(cgrp)) return cgrp; @@ -2167,13 +2167,13 @@ int cgroup_do_get_tree(struct fs_context *fc) struct super_block *sb = fc->root->d_sb; struct cgroup *cgrp; - mutex_lock(&cgroup_mutex); + cgroup_lock(); spin_lock_irq(&css_set_lock); cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root); spin_unlock_irq(&css_set_lock); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); nsdentry = kernfs_node_dentry(cgrp->kn, sb); dput(fc->root); @@ -2356,13 +2356,13 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, { int ret; - mutex_lock(&cgroup_mutex); + cgroup_lock(); spin_lock_irq(&css_set_lock); ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns); spin_unlock_irq(&css_set_lock); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -2388,7 +2388,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) int hierarchy_id = 1; int ret; - mutex_lock(&cgroup_mutex); + cgroup_lock(); spin_lock_irq(&css_set_lock); root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); @@ -2402,7 +2402,7 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) } spin_unlock_irq(&css_set_lock); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } EXPORT_SYMBOL_GPL(task_cgroup_path); @@ -3111,7 +3111,7 @@ void cgroup_lock_and_drain_offline(struct cgroup *cgrp) int ssid; restart: - mutex_lock(&cgroup_mutex); + cgroup_lock(); cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { @@ -3125,7 +3125,7 @@ restart: prepare_to_wait(&dsct->offline_waitq, &wait, TASK_UNINTERRUPTIBLE); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); schedule(); finish_wait(&dsct->offline_waitq, &wait); @@ -4374,9 +4374,9 @@ int cgroup_rm_cftypes(struct cftype *cfts) if (!(cfts[0].flags & __CFTYPE_ADDED)) return -ENOENT; - mutex_lock(&cgroup_mutex); + cgroup_lock(); ret = cgroup_rm_cftypes_locked(cfts); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -4408,14 +4408,14 @@ static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) if (ret) return ret; - mutex_lock(&cgroup_mutex); + cgroup_lock(); list_add_tail(&cfts->node, &ss->cfts); ret = cgroup_apply_cftypes(cfts, true); if (ret) cgroup_rm_cftypes_locked(cfts); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); return ret; } @@ -5385,7 +5385,7 @@ static void css_release_work_fn(struct work_struct *work) struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; - mutex_lock(&cgroup_mutex); + cgroup_lock(); css->flags |= CSS_RELEASED; list_del_rcu(&css->sibling); @@ -5426,7 +5426,7 @@ static void css_release_work_fn(struct work_struct *work) NULL); } - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); @@ -5774,7 +5774,7 @@ static void css_killed_work_fn(struct work_struct *work) struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, destroy_work); - mutex_lock(&cgroup_mutex); + cgroup_lock(); do { offline_css(css); @@ -5783,7 +5783,7 @@ static void css_killed_work_fn(struct work_struct *work) css = css->parent; } while (css && atomic_dec_and_test(&css->online_cnt)); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); } /* css kill confirmation processing requires process context, bounce */ @@ -5967,7 +5967,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) pr_debug("Initializing cgroup subsys %s\n", ss->name); - mutex_lock(&cgroup_mutex); + cgroup_lock(); idr_init(&ss->css_idr); INIT_LIST_HEAD(&ss->cfts); @@ -6011,7 +6011,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) BUG_ON(online_css(css)); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); } /** @@ -6071,7 +6071,7 @@ int __init cgroup_init(void) get_user_ns(init_cgroup_ns.user_ns); - mutex_lock(&cgroup_mutex); + cgroup_lock(); /* * Add init_css_set to the hash table so that dfl_root can link to @@ -6082,7 +6082,7 @@ int __init cgroup_init(void) BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); for_each_subsys(ss, ssid) { if (ss->early_init) { @@ -6134,9 +6134,9 @@ int __init cgroup_init(void) if (ss->bind) ss->bind(init_css_set.subsys[ssid]); - mutex_lock(&cgroup_mutex); + cgroup_lock(); css_populate_dir(init_css_set.subsys[ssid]); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); } /* init_css_set.subsys[] has been updated, re-hash */ @@ -6241,7 +6241,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, if (!buf) goto out; - mutex_lock(&cgroup_mutex); + cgroup_lock(); spin_lock_irq(&css_set_lock); for_each_root(root) { @@ -6296,7 +6296,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, retval = 0; out_unlock: spin_unlock_irq(&css_set_lock); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); kfree(buf); out: return retval; @@ -6380,7 +6380,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) struct file *f; if (kargs->flags & CLONE_INTO_CGROUP) - mutex_lock(&cgroup_mutex); + cgroup_lock(); cgroup_threadgroup_change_begin(current); @@ -6455,7 +6455,7 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) err: cgroup_threadgroup_change_end(current); - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); if (f) fput(f); if (dst_cgrp) @@ -6482,7 +6482,7 @@ static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs) struct cgroup *cgrp = kargs->cgrp; struct css_set *cset = kargs->cset; - mutex_unlock(&cgroup_mutex); + cgroup_unlock(); if (cset) { put_css_set(cset); -- cgit From 00e74ae0863827d944e36e56a4ce1e77e50edb91 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 18 Apr 2023 15:53:38 -0700 Subject: bpf: Don't EFAULT for getsockopt with optval=NULL Some socket options do getsockopt with optval=NULL to estimate the size of the final buffer (which is returned via optlen). This breaks BPF getsockopt assumptions about permitted optval buffer size. Let's enforce these assumptions only when non-NULL optval is provided. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Reported-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/ZD7Js4fj5YyI2oLd@google.com/T/#mb68daf700f87a9244a15d01d00c3f0e5b08f49f7 Link: https://lore.kernel.org/bpf/20230418225343.553806-2-sdf@google.com --- kernel/bpf/cgroup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/bpf/cgroup.c') diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 53edb8ad2471..a06e118a9be5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1921,14 +1921,17 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, if (ret < 0) goto out; - if (ctx.optlen > max_optlen || ctx.optlen < 0) { + if (optval && (ctx.optlen > max_optlen || ctx.optlen < 0)) { ret = -EFAULT; goto out; } if (ctx.optlen != 0) { - if (copy_to_user(optval, ctx.optval, ctx.optlen) || - put_user(ctx.optlen, optlen)) { + if (optval && copy_to_user(optval, ctx.optval, ctx.optlen)) { + ret = -EFAULT; + goto out; + } + if (put_user(ctx.optlen, optlen)) { ret = -EFAULT; goto out; } -- cgit