diff options
author | Alexei Starovoitov <ast@kernel.org> | 2022-09-21 19:25:26 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2022-09-21 19:25:27 -0700 |
commit | 2d863b14fbd9795c627074b4f344141ef3621b81 (patch) | |
tree | 398875d4e0a357355669b1a14851612ff4c8fb8e | |
parent | 1d8b82c613297f24354b4d750413a7456b5cd92c (diff) | |
parent | b06b45e82b59b69f5ac6b3916ac5dbd0294efc95 (diff) |
Merge branch 'Introduce bpf_ct_set_nat_info kfunc helper'
Lorenzo Bianconi says:
====================
Introduce bpf_ct_set_nat_info kfunc helper in order to set source and
destination nat addresses/ports in a new allocated ct entry not inserted
in the connection tracking table yet.
Introduce support for per-parameter trusted args.
Changes since v2:
- use int instead of a pointer for port in bpf_ct_set_nat_info signature
- modify KF_TRUSTED_ARGS definition in order to referenced pointer constraint
just for PTR_TO_BTF_ID
- drop patch 2/4
Changes since v1:
- enable CONFIG_NF_NAT in tools/testing/selftests/bpf/config
Kumar Kartikeya Dwivedi (1):
bpf: Tweak definition of KF_TRUSTED_ARGS
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | Documentation/bpf/kfuncs.rst | 24 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 18 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_bpf.c | 47 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/config | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 10 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_bpf_nf.c | 27 |
6 files changed, 110 insertions, 17 deletions
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 781731749e55..0f858156371d 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -137,14 +137,22 @@ KF_ACQUIRE and KF_RET_NULL flags. -------------------------- The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It -indicates that the all pointer arguments will always be refcounted, and have -their offset set to 0. It can be used to enforce that a pointer to a refcounted -object acquired from a kfunc or BPF helper is passed as an argument to this -kfunc without any modifications (e.g. pointer arithmetic) such that it is -trusted and points to the original object. This flag is often used for kfuncs -that operate (change some property, perform some operation) on an object that -was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to -ensure the integrity of the operation being performed on the expected object. +indicates that the all pointer arguments will always have a guaranteed lifetime, +and pointers to kernel objects are always passed to helpers in their unmodified +form (as obtained from acquire kfuncs). + +It can be used to enforce that a pointer to a refcounted object acquired from a +kfunc or BPF helper is passed as an argument to this kfunc without any +modifications (e.g. pointer arithmetic) such that it is trusted and points to +the original object. + +Meanwhile, it is also allowed pass pointers to normal memory to such kfuncs, +but those can have a non-zero offset. + +This flag is often used for kfuncs that operate (change some property, perform +some operation) on an object that was obtained using an acquire kfunc. Such +kfuncs need an unchanged pointer to ensure the integrity of the operation being +performed on the expected object. 2.4.6 KF_SLEEPABLE flag ----------------------- diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 13faede0f2b4..a44ad4b347ff 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6227,7 +6227,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, bool processing_call) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); - bool rel = false, kptr_get = false, trusted_arg = false; + bool rel = false, kptr_get = false, trusted_args = false; bool sleepable = false; struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; @@ -6265,7 +6265,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, /* Only kfunc can be release func */ rel = kfunc_meta->flags & KF_RELEASE; kptr_get = kfunc_meta->flags & KF_KPTR_GET; - trusted_arg = kfunc_meta->flags & KF_TRUSTED_ARGS; + trusted_args = kfunc_meta->flags & KF_TRUSTED_ARGS; sleepable = kfunc_meta->flags & KF_SLEEPABLE; } @@ -6276,6 +6276,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, enum bpf_arg_type arg_type = ARG_DONTCARE; u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; + bool obj_ptr = false; t = btf_type_skip_modifiers(btf, args[i].type, NULL); if (btf_type_is_scalar(t)) { @@ -6323,10 +6324,17 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return -EINVAL; } + /* These register types have special constraints wrt ref_obj_id + * and offset checks. The rest of trusted args don't. + */ + obj_ptr = reg->type == PTR_TO_CTX || reg->type == PTR_TO_BTF_ID || + reg2btf_ids[base_type(reg->type)]; + /* Check if argument must be a referenced pointer, args + i has * been verified to be a pointer (after skipping modifiers). + * PTR_TO_CTX is ok without having non-zero ref_obj_id. */ - if (is_kfunc && trusted_arg && !reg->ref_obj_id) { + if (is_kfunc && trusted_args && (obj_ptr && reg->type != PTR_TO_CTX) && !reg->ref_obj_id) { bpf_log(log, "R%d must be referenced\n", regno); return -EINVAL; } @@ -6335,7 +6343,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, ref_tname = btf_name_by_offset(btf, ref_t->name_off); /* Trusted args have the same offset checks as release arguments */ - if (trusted_arg || (rel && reg->ref_obj_id)) + if ((trusted_args && obj_ptr) || (rel && reg->ref_obj_id)) arg_type |= OBJ_RELEASE; ret = check_func_arg_reg_off(env, reg, regno, arg_type); if (ret < 0) @@ -6435,7 +6443,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, reg_ref_t->name_off); if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, reg->off, btf, ref_id, - trusted_arg || (rel && reg->ref_obj_id))) { + trusted_args || (rel && reg->ref_obj_id))) { bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", func_name, i, btf_type_str(ref_t), ref_tname, diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index 67df64283aef..756ea818574e 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -17,6 +17,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_nat.h> /* bpf_ct_opts - Options for CT lookup helpers * @@ -137,7 +138,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, memset(&ct->proto, 0, sizeof(ct->proto)); __nf_ct_set_timeout(ct, timeout * HZ); - ct->status |= IPS_CONFIRMED; out: if (opts->netns_id >= 0) @@ -390,6 +390,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) struct nf_conn *nfct = (struct nf_conn *)nfct_i; int err; + nfct->status |= IPS_CONFIRMED; err = nf_conntrack_hash_check_insert(nfct); if (err < 0) { nf_conntrack_free(nfct); @@ -475,6 +476,49 @@ int bpf_ct_change_status(struct nf_conn *nfct, u32 status) return nf_ct_change_status_common(nfct, status); } +/* bpf_ct_set_nat_info - Set source or destination nat address + * + * Set source or destination nat address of the newly allocated + * nf_conn before insertion. This must be invoked for referenced + * PTR_TO_BTF_ID to nf_conn___init. + * + * Parameters: + * @nfct - Pointer to referenced nf_conn object, obtained using + * bpf_xdp_ct_alloc or bpf_skb_ct_alloc. + * @addr - Nat source/destination address + * @port - Nat source/destination port. Non-positive values are + * interpreted as select a random port. + * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST + */ +int bpf_ct_set_nat_info(struct nf_conn___init *nfct, + union nf_inet_addr *addr, int port, + enum nf_nat_manip_type manip) +{ +#if ((IS_MODULE(CONFIG_NF_NAT) && IS_MODULE(CONFIG_NF_CONNTRACK)) || \ + IS_BUILTIN(CONFIG_NF_NAT)) + struct nf_conn *ct = (struct nf_conn *)nfct; + u16 proto = nf_ct_l3num(ct); + struct nf_nat_range2 range; + + if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6) + return -EINVAL; + + memset(&range, 0, sizeof(struct nf_nat_range2)); + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = *addr; + range.max_addr = range.min_addr; + if (port > 0) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto.all = cpu_to_be16(port); + range.max_proto.all = range.min_proto.all; + } + + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; +#else + return -EOPNOTSUPP; +#endif +} + __diag_pop() BTF_SET8_START(nf_ct_kfunc_set) @@ -488,6 +532,7 @@ BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) BTF_SET8_END(nf_ct_kfunc_set) static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 905a9be8d0a2..9213565c0311 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -63,6 +63,7 @@ CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 0677a51694c9..8a838ea8bdf3 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -26,7 +26,10 @@ enum { TEST_TC_BPF, }; -#define TIMEOUT_MS 3000 +#define TIMEOUT_MS 3000 +#define IPS_STATUS_MASK (IPS_CONFIRMED | IPS_SEEN_REPLY | \ + IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \ + IPS_SRC_NAT | IPS_DST_NAT) static int connect_to_server(int srv_fd) { @@ -114,10 +117,11 @@ static void test_bpf_nf_ct(int mode) ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value"); - ASSERT_EQ(skel->bss->test_status, IPS_CONFIRMED | IPS_SEEN_REPLY, - "Test for ct status update "); + ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update "); ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup"); ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); + ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting"); + ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting"); end: if (srv_client_fd != -1) close(srv_client_fd); diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 88842da86ddc..227e85e85dda 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define EAFNOSUPPORT 97 #define EPROTO 71 @@ -24,6 +25,8 @@ int test_succ_lookup = -ENOENT; u32 test_delta_timeout = 0; u32 test_status = 0; u32 test_insert_lookup_mark = 0; +int test_snat_addr = -EINVAL; +int test_dnat_addr = -EINVAL; __be32 saddr = 0; __be16 sport = 0; __be32 daddr = 0; @@ -54,6 +57,8 @@ void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; int bpf_ct_set_status(struct nf_conn *, u32) __ksym; int bpf_ct_change_status(struct nf_conn *, u32) __ksym; +int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *, + int port, enum nf_nat_manip_type) __ksym; static __always_inline void nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, @@ -141,11 +146,22 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); if (ct) { + __u16 sport = bpf_get_prandom_u32(); + __u16 dport = bpf_get_prandom_u32(); + union nf_inet_addr saddr = {}; + union nf_inet_addr daddr = {}; struct nf_conn *ct_ins; bpf_ct_set_timeout(ct, 10000); ct->mark = 77; + /* snat */ + saddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC); + /* dnat */ + daddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST); + ct_ins = bpf_ct_insert_entry(ct); if (ct_ins) { struct nf_conn *ct_lk; @@ -153,6 +169,17 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); if (ct_lk) { + struct nf_conntrack_tuple *tuple; + + /* check snat and dnat addresses */ + tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple; + if (tuple->dst.u3.ip == saddr.ip && + tuple->dst.u.all == bpf_htons(sport)) + test_snat_addr = 0; + if (tuple->src.u3.ip == daddr.ip && + tuple->src.u.all == bpf_htons(dport)) + test_dnat_addr = 0; + /* update ct entry timeout */ bpf_ct_change_timeout(ct_lk, 10000); test_delta_timeout = ct_lk->timeout - bpf_jiffies64(); |