aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorDaniel Borkmann <[email protected]>2018-10-03 02:53:49 +0200
committerDaniel Borkmann <[email protected]>2018-10-03 02:53:50 +0200
commit33d9a7fd675678e7ea739ccb104abc1139caf19e (patch)
tree09c60a0021239cf3718da9d91bbe59b2e2e6361b /include/linux
parent940656fb3f94b90b92a3fc5994614870b14866b9 (diff)
parenta610b665ec9ec9ba076e6dd8703750999c81eae4 (diff)
Merge branch 'bpf-sk-lookup'
Joe Stringer says: ==================== This series proposes a new helper for the BPF API which allows BPF programs to perform lookups for sockets in a network namespace. This would allow programs to determine early on in processing whether the stack is expecting to receive the packet, and perform some action (eg drop, forward somewhere) based on this information. The series is structured roughly into: * Misc refactor * Add the socket pointer type * Add reference tracking to ensure that socket references are freed * Extend the BPF API to add sk_lookup_xxx() / sk_release() functions * Add tests/documentation The helper proposed in this series includes a parameter for a tuple which must be filled in by the caller to determine the socket to look up. The simplest case would be filling with the contents of the packet, ie mapping the packet's 5-tuple into the parameter. In common cases, it may alternatively be useful to reverse the direction of the tuple and perform a lookup, to find the socket that initiates this connection; and if the BPF program ever performs a form of IP address translation, it may further be useful to be able to look up arbitrary tuples that are not based upon the packet, but instead based on state held in BPF maps or hardcoded in the BPF program. Currently, access into the socket's fields are limited to those which are otherwise already accessible, and are restricted to read-only access. Changes since v3: * New patch: "bpf: Reuse canonical string formatter for ctx errs" * Add PTR_TO_SOCKET to is_ctx_reg(). * Add a few new checks to prevent mixing of socket/non-socket pointers. * Swap order of checks in sock_filter_is_valid_access(). * Prefix register spill macros with "bpf_". * Add acks from previous round * Rebase Changes since v2: * New patch: "selftests/bpf: Generalize dummy program types". This enables adding verifier tests for socket lookup with tail calls. * Define the semantics of the new helpers more clearly in uAPI header. * Fix release of caller_net when netns is not specified. * Use skb->sk to find caller net when skb->dev is unavailable. * Fix build with !CONFIG_NET. * Replace ptr_id defensive coding when releasing reference state with an internal error (-EFAULT). * Remove flags argument to sk_release(). * Add several new assembly tests suggested by Daniel. * Add a few new C tests. * Fix typo in verifier error message. Changes since v1: * Limit netns_id field to 32 bits * Reuse reg_type_mismatch() in more places * Reduce the number of passes at convert_ctx_access() * Replace ptr_id defensive coding when releasing reference state with an internal error (-EFAULT) * Rework 'struct bpf_sock_tuple' to allow passing a packet pointer * Allow direct packet access from helper * Fix compile error with CONFIG_IPV6 enabled * Improve commit messages Changes since RFC: * Split up sk_lookup() into sk_lookup_tcp(), sk_lookup_udp(). * Only take references on the socket when necessary. * Make sk_release() only free the socket reference in this case. * Fix some runtime reference leaks: * Disallow BPF_LD_[ABS|IND] instructions while holding a reference. * Disallow bpf_tail_call() while holding a reference. * Prevent the same instruction being used for reference and other pointer type. * Simplify locating copies of a reference during helper calls by caching the pointer id from the caller. * Fix kbuild compilation warnings with particular configs. * Improve code comments describing the new verifier pieces. * Tested by Nitin ==================== Signed-off-by: Daniel Borkmann <[email protected]>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bpf.h34
-rw-r--r--include/linux/bpf_verifier.h37
2 files changed, 68 insertions, 3 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 018299a595c8..027697b6a22f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -154,6 +154,7 @@ enum bpf_arg_type {
ARG_PTR_TO_CTX, /* pointer to context */
ARG_ANYTHING, /* any (initialized) argument is ok */
+ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
};
/* type of values returned from helper functions */
@@ -162,6 +163,7 @@ enum bpf_return_type {
RET_VOID, /* function doesn't return anything */
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
+ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -213,6 +215,8 @@ enum bpf_reg_type {
PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
+ PTR_TO_SOCKET, /* reg points to struct bpf_sock */
+ PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
};
/* The information passed from prog-specific *_is_valid_access
@@ -343,6 +347,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
unsigned long off, unsigned long len);
+typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
+ const struct bpf_insn *src,
+ struct bpf_insn *dst,
+ struct bpf_prog *prog,
+ u32 *target_size);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
@@ -836,4 +845,29 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+#if defined(CONFIG_NET)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info);
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size);
+#else
+static inline bool bpf_sock_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ return false;
+}
+static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ return 0;
+}
+#endif
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b42b60a83e19..7b6fd2ab3263 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -58,6 +58,8 @@ struct bpf_reg_state {
* offset, so they can share range knowledge.
* For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
* came from, when one is tested for != NULL.
+ * For PTR_TO_SOCKET this is used to share which pointers retain the
+ * same reference to the socket, to determine proper reference freeing.
*/
u32 id;
/* For scalar types (SCALAR_VALUE), this represents our knowledge of
@@ -102,6 +104,17 @@ struct bpf_stack_state {
u8 slot_type[BPF_REG_SIZE];
};
+struct bpf_reference_state {
+ /* Track each reference created with a unique id, even if the same
+ * instruction creates the reference multiple times (eg, via CALL).
+ */
+ int id;
+ /* Instruction where the allocation of this reference occurred. This
+ * is used purely to inform the user of a reference leak.
+ */
+ int insn_idx;
+};
+
/* state of the program:
* type of all registers and stack info
*/
@@ -119,7 +132,9 @@ struct bpf_func_state {
*/
u32 subprogno;
- /* should be second to last. See copy_func_state() */
+ /* The following fields should be last. See copy_func_state() */
+ int acquired_refs;
+ struct bpf_reference_state *refs;
int allocated_stack;
struct bpf_stack_state *stack;
};
@@ -131,6 +146,17 @@ struct bpf_verifier_state {
u32 curframe;
};
+#define bpf_get_spilled_reg(slot, frame) \
+ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \
+ (frame->stack[slot].slot_type[0] == STACK_SPILL)) \
+ ? &frame->stack[slot].spilled_ptr : NULL)
+
+/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
+#define bpf_for_each_spilled_reg(iter, frame, reg) \
+ for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \
+ iter < frame->allocated_stack / BPF_REG_SIZE; \
+ iter++, reg = bpf_get_spilled_reg(iter, frame))
+
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
@@ -204,11 +230,16 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
const char *fmt, ...);
-static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
- return cur->frame[cur->curframe]->regs;
+ return cur->frame[cur->curframe];
+}
+
+static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+{
+ return cur_func(env)->regs;
}
int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);