aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace/bpf_trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/bpf_trace.c')
-rw-r--r--kernel/trace/bpf_trace.c174
1 files changed, 98 insertions, 76 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index fa77311dadb2..37385193a608 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -76,8 +76,8 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.func = bpf_probe_read,
.gpl_only = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_RAW_STACK,
- .arg2_type = ARG_CONST_STACK_SIZE,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
};
@@ -96,7 +96,7 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
if (unlikely(in_interrupt() ||
current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
- if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
+ if (unlikely(uaccess_kernel()))
return -EPERM;
if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
return -EPERM;
@@ -109,8 +109,8 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
- .arg2_type = ARG_PTR_TO_STACK,
- .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
@@ -122,8 +122,8 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
}
/*
- * limited trace_printk()
- * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
+ * Only limited trace_printk() conversion specifiers allowed:
+ * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
*/
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
@@ -198,7 +198,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
i++;
}
- if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
+ if (fmt[i] != 'i' && fmt[i] != 'd' &&
+ fmt[i] != 'u' && fmt[i] != 'x')
return -EINVAL;
fmt_cnt++;
}
@@ -213,8 +214,8 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
.func = bpf_trace_printk,
.gpl_only = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_STACK,
- .arg2_type = ARG_CONST_STACK_SIZE,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE,
};
const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
@@ -234,7 +235,8 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
struct bpf_event_entry *ee;
- struct perf_event *event;
+ u64 value = 0;
+ int err;
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
@@ -247,21 +249,14 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
if (!ee)
return -ENOENT;
- event = ee->event;
- if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
- event->attr.type != PERF_TYPE_RAW))
- return -EINVAL;
-
- /* make sure event is local and doesn't have pmu::count */
- if (unlikely(event->oncpu != cpu || event->pmu->count))
- return -EINVAL;
-
+ err = perf_event_read_local(ee->event, &value);
/*
- * we don't know if the function is run successfully by the
- * return value. It can be judged in other places, such as
- * eBPF programs.
+ * this api is ugly since we miss [-22..-2] range of valid
+ * counter values, but that's uapi
*/
- return perf_event_read_local(event);
+ if (err)
+ return err;
+ return value;
}
static const struct bpf_func_proto bpf_perf_event_read_proto = {
@@ -272,14 +267,16 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
.arg2_type = ARG_ANYTHING,
};
+static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
+
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_raw_record *raw)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
- struct perf_sample_data sample_data;
struct bpf_event_entry *ee;
struct perf_event *event;
@@ -300,9 +297,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
- perf_sample_data_init(&sample_data, 0, 0);
- sample_data.raw = raw;
- perf_event_output(event, &sample_data, regs);
+ perf_sample_data_init(sd, 0, 0);
+ sd->raw = raw;
+ perf_event_output(event, sd, regs);
return 0;
}
@@ -329,8 +326,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_STACK,
- .arg5_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
};
static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
@@ -395,6 +392,36 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
+ const void *, unsafe_ptr)
+{
+ int ret;
+
+ /*
+ * The strncpy_from_unsafe() call will likely not fill the entire
+ * buffer, but that's okay in this circumstance as we're probing
+ * arbitrary memory anyway similar to bpf_probe_read() and might
+ * as well probe the stack. Thus, memory is explicitly cleared
+ * only in error case, so that improper users ignoring return
+ * code altogether don't copy garbage; otherwise length of string
+ * is returned that can be used for bpf_perf_event_output() et al.
+ */
+ ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_str_proto = {
+ .func = bpf_probe_read_str,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -432,6 +459,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
+ case BPF_FUNC_probe_read_str:
+ return &bpf_probe_read_str_proto;
default:
return NULL;
}
@@ -451,7 +480,7 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
/* bpf+kprobe programs can access fields of 'struct pt_regs' */
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct pt_regs))
return false;
@@ -459,19 +488,21 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
return false;
if (off % size != 0)
return false;
+ /*
+ * Assertion for 32 bit to make sure last 8 byte access
+ * (BPF_DW) to the last 4 byte member is disallowed.
+ */
+ if (off + size > sizeof(struct pt_regs))
+ return false;
+
return true;
}
-static const struct bpf_verifier_ops kprobe_prog_ops = {
+const struct bpf_verifier_ops kprobe_prog_ops = {
.get_func_proto = kprobe_prog_func_proto,
.is_valid_access = kprobe_prog_is_valid_access,
};
-static struct bpf_prog_type_list kprobe_tl = {
- .ops = &kprobe_prog_ops,
- .type = BPF_PROG_TYPE_KPROBE,
-};
-
BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
@@ -492,8 +523,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_STACK,
- .arg5_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
};
BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
@@ -532,7 +563,7 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
}
static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ struct bpf_insn_access_aux *info)
{
if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
return false;
@@ -540,82 +571,73 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type
return false;
if (off % size != 0)
return false;
+
+ BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
return true;
}
-static const struct bpf_verifier_ops tracepoint_prog_ops = {
+const struct bpf_verifier_ops tracepoint_prog_ops = {
.get_func_proto = tp_prog_func_proto,
.is_valid_access = tp_prog_is_valid_access,
};
-static struct bpf_prog_type_list tracepoint_tl = {
- .ops = &tracepoint_prog_ops,
- .type = BPF_PROG_TYPE_TRACEPOINT,
-};
-
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
- enum bpf_reg_type *reg_type)
+ struct bpf_insn_access_aux *info)
{
+ const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
+ sample_period);
+
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
return false;
if (type != BPF_READ)
return false;
if (off % size != 0)
return false;
- if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
- if (size != sizeof(u64))
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
+ bpf_ctx_record_field_size(info, size_sp);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
return false;
- } else {
+ break;
+ default:
if (size != sizeof(long))
return false;
}
+
return true;
}
-static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
- int src_reg, int ctx_off,
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
struct bpf_insn *insn_buf,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
- switch (ctx_off) {
+ switch (si->off) {
case offsetof(struct bpf_perf_event_data, sample_period):
- BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
-
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
- data), dst_reg, src_reg,
+ data), si->dst_reg, si->src_reg,
offsetof(struct bpf_perf_event_data_kern, data));
- *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
- offsetof(struct perf_sample_data, period));
+ *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct perf_sample_data, period, 8,
+ target_size));
break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
- regs), dst_reg, src_reg,
+ regs), si->dst_reg, si->src_reg,
offsetof(struct bpf_perf_event_data_kern, regs));
- *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
+ si->off);
break;
}
return insn - insn_buf;
}
-static const struct bpf_verifier_ops perf_event_prog_ops = {
+const struct bpf_verifier_ops perf_event_prog_ops = {
.get_func_proto = tp_prog_func_proto,
.is_valid_access = pe_prog_is_valid_access,
.convert_ctx_access = pe_prog_convert_ctx_access,
};
-
-static struct bpf_prog_type_list perf_event_tl = {
- .ops = &perf_event_prog_ops,
- .type = BPF_PROG_TYPE_PERF_EVENT,
-};
-
-static int __init register_kprobe_prog_ops(void)
-{
- bpf_register_prog_type(&kprobe_tl);
- bpf_register_prog_type(&tracepoint_tl);
- bpf_register_prog_type(&perf_event_tl);
- return 0;
-}
-late_initcall(register_kprobe_prog_ops);