aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c110
-rw-r--r--drivers/net/bonding/bond_main.c27
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c22
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c56
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c62
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c71
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c6
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c223
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c21
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c410
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h28
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c8
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c131
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.c11
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h14
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/lag_conf.c726
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c61
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h52
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h18
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_repr.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c43
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c37
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_filter.c656
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c1
-rw-r--r--drivers/net/ethernet/realtek/8139too.c2
-rw-r--r--drivers/net/ethernet/sfc/tx.c33
-rw-r--r--drivers/net/hyperv/rndis_filter.c5
-rw-r--r--drivers/net/team/team.c1
-rw-r--r--drivers/net/tun.c37
-rw-r--r--drivers/net/virtio_net.c66
-rw-r--r--include/linux/bpf.h24
-rw-r--r--include/linux/bpf_types.h5
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/if_bridge.h1
-rw-r--r--include/linux/netdevice.h25
-rw-r--r--include/linux/perf_event.h5
-rw-r--r--include/linux/qed/qed_eth_if.h4
-rw-r--r--include/linux/qed/qed_if.h1
-rw-r--r--include/linux/trace_events.h17
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/ip6_fib.h6
-rw-r--r--include/net/ip6_route.h3
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/page_pool.h5
-rw-r--r--include/net/pkt_cls.h2
-rw-r--r--include/net/seg6.h7
-rw-r--r--include/net/seg6_local.h32
-rw-r--r--include/net/xdp.h1
-rw-r--r--include/net/xdp_sock.h13
-rw-r--r--include/trace/events/fib.h107
-rw-r--r--include/trace/events/fib6.h29
-rw-r--r--include/trace/events/xdp.h50
-rw-r--r--include/uapi/linux/bpf.h143
-rw-r--r--include/uapi/linux/btf.h37
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/if_xdp.h59
-rw-r--r--include/uapi/linux/seg6_local.h12
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/btf.c334
-rw-r--r--kernel/bpf/cpumap.c2
-rw-r--r--kernel/bpf/devmap.c131
-rw-r--r--kernel/bpf/sockmap.c4
-rw-r--r--kernel/bpf/syscall.c245
-rw-r--r--kernel/bpf/verifier.c23
-rw-r--r--kernel/bpf/xskmap.c9
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/trace/bpf_trace.c48
-rw-r--r--kernel/trace/trace_kprobe.c29
-rw-r--r--kernel/trace/trace_uprobe.c22
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_netlink.c9
-rw-r--r--net/bridge/br_private.h9
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/core/filter.c572
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/xdp.c20
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_trie.c14
-rw-r--r--net/ipv4/route.c31
-rw-r--r--net/ipv6/Kconfig5
-rw-r--r--net/ipv6/addrconf_core.c8
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/route.c57
-rw-r--r--net/ipv6/seg6_local.c190
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c24
-rw-r--r--net/sched/cls_bpf.c22
-rw-r--r--net/sched/cls_cgroup.c23
-rw-r--r--net/sched/cls_flow.c24
-rw-r--r--net/sched/cls_flower.c40
-rw-r--r--net/sched/cls_fw.c24
-rw-r--r--net/sched/cls_matchall.c21
-rw-r--r--net/sched/cls_route.c23
-rw-r--r--net/sched/cls_rsvp.h20
-rw-r--r--net/sched/cls_tcindex.c41
-rw-r--r--net/sched/cls_u32.c37
-rw-r--r--net/xdp/Makefile1
-rw-r--r--net/xdp/xdp_umem.c96
-rw-r--r--net/xdp/xdp_umem.h18
-rw-r--r--net/xdp/xdp_umem_props.h13
-rw-r--r--net/xdp/xsk.c150
-rw-r--r--net/xdp/xsk_queue.c12
-rw-r--r--net/xdp/xsk_queue.h34
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/task_fd_query_kern.c19
-rw-r--r--samples/bpf/task_fd_query_user.c382
-rw-r--r--samples/bpf/xdp_monitor_kern.c49
-rw-r--r--samples/bpf/xdp_monitor_user.c69
-rw-r--r--samples/bpf/xdpsock_user.c135
-rwxr-xr-xscripts/bpf_helpers_doc.py8
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst81
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst5
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool9
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h1
-rw-r--r--tools/bpf/bpftool/perf.c246
-rw-r--r--tools/bpf/bpftool/prog.c97
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c14
-rw-r--r--tools/bpf/bpftool/xlated_dumper.h3
-rw-r--r--tools/include/uapi/linux/bpf.h143
-rw-r--r--tools/include/uapi/linux/btf.h37
-rw-r--r--tools/lib/bpf/bpf.c27
-rw-r--r--tools/lib/bpf/bpf.h7
-rw-r--r--tools/lib/bpf/btf.c5
-rw-r--r--tools/lib/bpf/libbpf.c43
-rw-r--r--tools/lib/bpf/libbpf.h4
-rw-r--r--tools/testing/selftests/bpf/Makefile16
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/test_btf.c521
-rw-r--r--tools/testing/selftests/bpf/test_lwt_seg6local.c437
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh140
-rw-r--r--tools/testing/selftests/bpf/test_progs.c158
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c115
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c12
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh109
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh70
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh8
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh61
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh53
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh92
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh140
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh54
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh101
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh169
161 files changed, 8200 insertions, 1514 deletions
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 1bdb1aff0619..f1c95779843b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
{
+ unsigned int i, ctx_idx = ctx->idx;
+
+ /* Load function address into r12 */
+ PPC_LI64(12, func);
+
+ /* For bpf-to-bpf function calls, the callee's address is unknown
+ * until the last extra pass. As seen above, we use PPC_LI64() to
+ * load the callee's address, but this may optimize the number of
+ * instructions required based on the nature of the address.
+ *
+ * Since we don't want the number of instructions emitted to change,
+ * we pad the optimized PPC_LI64() call with NOPs to guarantee that
+ * we always have a five-instruction sequence, which is the maximum
+ * that PPC_LI64() can emit.
+ */
+ for (i = ctx->idx - ctx_idx; i < 5; i++)
+ PPC_NOP();
+
#ifdef PPC64_ELF_ABI_v1
- /* func points to the function descriptor */
- PPC_LI64(b2p[TMP_REG_2], func);
- /* Load actual entry point from function descriptor */
- PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
- /* ... and move it to LR */
- PPC_MTLR(b2p[TMP_REG_1]);
/*
* Load TOC from function descriptor at offset 8.
* We can clobber r2 since we get called through a
* function pointer (so caller will save/restore r2)
* and since we don't use a TOC ourself.
*/
- PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
-#else
- /* We can clobber r12 */
- PPC_FUNC_ADDR(12, func);
- PPC_MTLR(12);
+ PPC_BPF_LL(2, 12, 8);
+ /* Load actual entry point from function descriptor */
+ PPC_BPF_LL(12, 12, 0);
#endif
+
+ PPC_MTLR(12);
PPC_BLRL();
}
@@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx,
- u32 *addrs)
+ u32 *addrs, bool extra_pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -712,11 +724,25 @@ emit_clear:
break;
/*
- * Call kernel helper
+ * Call kernel helper or bpf function
*/
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
- func = (u8 *) __bpf_call_base + imm;
+
+ /* bpf function call */
+ if (insn[i].src_reg == BPF_PSEUDO_CALL)
+ if (!extra_pass)
+ func = NULL;
+ else if (fp->aux->func && off < fp->aux->func_cnt)
+ /* use the subprog id from the off
+ * field to lookup the callee address
+ */
+ func = (u8 *) fp->aux->func[off]->bpf_func;
+ else
+ return -EINVAL;
+ /* kernel helper call */
+ else
+ func = (u8 *) __bpf_call_base + imm;
bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -864,6 +890,14 @@ cond_branch:
return 0;
}
+struct powerpc64_jit_data {
+ struct bpf_binary_header *header;
+ u32 *addrs;
+ u8 *image;
+ u32 proglen;
+ struct codegen_context ctx;
+};
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;
@@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
u8 *image = NULL;
u32 *code_base;
u32 *addrs;
+ struct powerpc64_jit_data *jit_data;
struct codegen_context cgctx;
int pass;
int flen;
@@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *org_fp = fp;
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
+ bool extra_pass = false;
if (!fp->jit_requested)
return org_fp;
@@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = tmp_fp;
}
+ jit_data = fp->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ fp = org_fp;
+ goto out;
+ }
+ fp->aux->jit_data = jit_data;
+ }
+
flen = fp->len;
+ addrs = jit_data->addrs;
+ if (addrs) {
+ cgctx = jit_data->ctx;
+ image = jit_data->image;
+ bpf_hdr = jit_data->header;
+ proglen = jit_data->proglen;
+ alloclen = proglen + FUNCTION_DESCR_SIZE;
+ extra_pass = true;
+ goto skip_init_ctx;
+ }
+
addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL) {
fp = org_fp;
- goto out;
+ goto out_addrs;
}
memset(&cgctx, 0, sizeof(struct codegen_context));
@@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
- goto out;
+ goto out_addrs;
}
/*
@@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_fill_ill_insns);
if (!bpf_hdr) {
fp = org_fp;
- goto out;
+ goto out_addrs;
}
+skip_init_ctx:
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
/* Code generation passes 1-2 */
@@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+ bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
@@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+ if (!fp->is_func || extra_pass) {
+out_addrs:
+ kfree(addrs);
+ kfree(jit_data);
+ fp->aux->jit_data = NULL;
+ } else {
+ jit_data->addrs = addrs;
+ jit_data->ctx = cgctx;
+ jit_data->proglen = proglen;
+ jit_data->image = image;
+ jit_data->header = bpf_hdr;
+ }
out:
- kfree(addrs);
-
if (bpf_blinded)
bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index fea17b92b1ae..bd53a71f6b00 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1218,12 +1218,37 @@ static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond)
}
}
+static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond,
+ enum netdev_lag_tx_type type)
+{
+ if (type != NETDEV_LAG_TX_TYPE_HASH)
+ return NETDEV_LAG_HASH_NONE;
+
+ switch (bond->params.xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER2:
+ return NETDEV_LAG_HASH_L2;
+ case BOND_XMIT_POLICY_LAYER34:
+ return NETDEV_LAG_HASH_L34;
+ case BOND_XMIT_POLICY_LAYER23:
+ return NETDEV_LAG_HASH_L23;
+ case BOND_XMIT_POLICY_ENCAP23:
+ return NETDEV_LAG_HASH_E23;
+ case BOND_XMIT_POLICY_ENCAP34:
+ return NETDEV_LAG_HASH_E34;
+ default:
+ return NETDEV_LAG_HASH_UNKNOWN;
+ }
+}
+
static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave,
struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
+ enum netdev_lag_tx_type type;
- lag_upper_info.tx_type = bond_lag_tx_type(bond);
+ type = bond_lag_tx_type(bond);
+ lag_upper_info.tx_type = type;
+ lag_upper_info.hash_type = bond_lag_hash_type(bond, type);
return netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
&lag_upper_info, extack);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 0f305d97f4ee..0dbe2d9e22d6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1738,6 +1738,9 @@ int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid,
bool ucast, u64 vec, bool sleep_ok);
int t4_enable_vi_params(struct adapter *adap, unsigned int mbox,
unsigned int viid, bool rx_en, bool tx_en, bool dcb_en);
+int t4_enable_pi_params(struct adapter *adap, unsigned int mbox,
+ struct port_info *pi,
+ bool rx_en, bool tx_en, bool dcb_en);
int t4_enable_vi(struct adapter *adap, unsigned int mbox, unsigned int viid,
bool rx_en, bool tx_en);
int t4_identify_port(struct adapter *adap, unsigned int mbox, unsigned int viid,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 59d04d73c672..f7eef93ffc87 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -800,24 +800,20 @@ static int set_link_ksettings(struct net_device *dev,
if (base->duplex != DUPLEX_FULL)
return -EINVAL;
- if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
- /* PHY offers a single speed. See if that's what's
- * being requested.
- */
- if (base->autoneg == AUTONEG_DISABLE &&
- (lc->pcaps & speed_to_fw_caps(base->speed)))
- return 0;
- return -EINVAL;
- }
-
old_lc = *lc;
- if (base->autoneg == AUTONEG_DISABLE) {
+ if (!(lc->pcaps & FW_PORT_CAP32_ANEG) ||
+ base->autoneg == AUTONEG_DISABLE) {
fw_caps = speed_to_fw_caps(base->speed);
- if (!(lc->pcaps & fw_caps))
+ /* Must only specify a single speed which must be supported
+ * as part of the Physical Port Capabilities.
+ */
+ if ((fw_caps & (fw_caps - 1)) != 0 ||
+ !(lc->pcaps & fw_caps))
return -EINVAL;
+
lc->speed_caps = fw_caps;
- lc->acaps = 0;
+ lc->acaps = fw_caps;
} else {
fw_caps =
lmm_to_fw_caps(link_ksettings->link_modes.advertising);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 513e1d356384..0efae2030e71 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -465,7 +465,7 @@ static int link_start(struct net_device *dev)
&pi->link_cfg);
if (ret == 0) {
local_bh_disable();
- ret = t4_enable_vi_params(pi->adapter, mb, pi->viid, true,
+ ret = t4_enable_pi_params(pi->adapter, mb, pi, true,
true, CXGB4_DCB_ENABLED);
local_bh_enable();
}
@@ -2344,7 +2344,8 @@ static int cxgb_close(struct net_device *dev)
netif_tx_stop_all_queues(dev);
netif_carrier_off(dev);
- ret = t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
+ ret = t4_enable_pi_params(adapter, adapter->pf, pi,
+ false, false, false);
#ifdef CONFIG_CHELSIO_T4_DCB
cxgb4_dcb_reset(dev);
dcb_tx_queue_prio_enable(dev, false);
@@ -4140,6 +4141,10 @@ static int adap_init0(struct adapter *adap)
* card
*/
card_fw = kvzalloc(sizeof(*card_fw), GFP_KERNEL);
+ if (!card_fw) {
+ ret = -ENOMEM;
+ goto bye;
+ }
/* Get FW from from /lib/firmware/ */
ret = request_firmware(&fw, fw_info->fw_mod_name,
@@ -5236,14 +5241,11 @@ static void free_some_resources(struct adapter *adapter)
NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
#define SEGMENT_SIZE 128
-static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
+static int t4_get_chip_type(struct adapter *adap, int ver)
{
- u16 device_id;
+ u32 pl_rev = REV_G(t4_read_reg(adap, PL_REV_A));
- /* Retrieve adapter's device ID */
- pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
-
- switch (device_id >> 12) {
+ switch (ver) {
case CHELSIO_T4:
return CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
case CHELSIO_T5:
@@ -5251,8 +5253,7 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
case CHELSIO_T6:
return CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev);
default:
- dev_err(&pdev->dev, "Device %d is not supported\n",
- device_id);
+ break;
}
return -EINVAL;
}
@@ -5422,15 +5423,18 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
- int func, i, err, s_qpp, qpp, num_seg;
+ struct net_device *netdev;
+ struct adapter *adapter;
+ static int adap_idx = 1;
+ int s_qpp, qpp, num_seg;
struct port_info *pi;
bool highdma = false;
- struct adapter *adapter = NULL;
- struct net_device *netdev;
- void __iomem *regs;
- u32 whoami, pl_rev;
enum chip_type chip;
- static int adap_idx = 1;
+ void __iomem *regs;
+ int func, chip_ver;
+ u16 device_id;
+ int i, err;
+ u32 whoami;
printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
@@ -5466,11 +5470,17 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_free_adapter;
/* We control everything through one PF */
- whoami = readl(regs + PL_WHOAMI_A);
- pl_rev = REV_G(readl(regs + PL_REV_A));
- chip = get_chip_type(pdev, pl_rev);
- func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
- SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
+ whoami = t4_read_reg(adapter, PL_WHOAMI_A);
+ pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
+ chip = t4_get_chip_type(adapter, CHELSIO_PCI_ID_VER(device_id));
+ if (chip < 0) {
+ dev_err(&pdev->dev, "Device %d is not supported\n", device_id);
+ err = chip;
+ goto out_free_adapter;
+ }
+ chip_ver = CHELSIO_CHIP_VERSION(chip);
+ func = chip_ver <= CHELSIO_T5 ?
+ SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
adapter->pdev = pdev;
adapter->pdev_dev = &pdev->dev;
@@ -5636,7 +5646,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_TC;
- if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5) {
+ if (chip_ver > CHELSIO_T5) {
netdev->hw_enc_features |= NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM |
NETIF_F_RXCSUM |
@@ -5716,7 +5726,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_warn(&pdev->dev, "could not allocate MPS Encap entries, continuing\n");
#if IS_ENABLED(CONFIG_IPV6)
- if ((CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) &&
+ if (chip_ver <= CHELSIO_T5 &&
(!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) {
/* CLIP functionality is not present in hardware,
* hence disable all offload features
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
index 54b718111e3f..721c77577ec5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
@@ -34,6 +34,8 @@
#ifndef __T4_CHIP_TYPE_H__
#define __T4_CHIP_TYPE_H__
+#define CHELSIO_PCI_ID_VER(__DeviceID) ((__DeviceID) >> 12)
+
#define CHELSIO_T4 0x4
#define CHELSIO_T5 0x5
#define CHELSIO_T6 0x6
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 704f6960a6ea..39da7e3c804b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4066,6 +4066,7 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox,
fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap;
struct fw_port_cmd cmd;
unsigned int fw_mdi;
+ int ret;
fw_mdi = (FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO) & lc->pcaps);
/* Convert driver coding of Pause Frame Flow Control settings into the
@@ -4100,6 +4101,13 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox,
rcap = lc->acaps | fw_fc | fw_fec | fw_mdi;
}
+ if (rcap & ~lc->pcaps) {
+ dev_err(adapter->pdev_dev,
+ "Requested Port Capabilities %#x exceed Physical Port Capabilities %#x\n",
+ rcap, lc->pcaps);
+ return -EINVAL;
+ }
+
/* And send that on to the Firmware ...
*/
memset(&cmd, 0, sizeof(cmd));
@@ -4110,13 +4118,21 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox,
cpu_to_be32(FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
? FW_PORT_ACTION_L1_CFG
: FW_PORT_ACTION_L1_CFG32) |
- FW_LEN16(cmd));
+ FW_LEN16(cmd));
if (fw_caps == FW_CAPS16)
cmd.u.l1cfg.rcap = cpu_to_be32(fwcaps32_to_caps16(rcap));
else
cmd.u.l1cfg32.rcap32 = cpu_to_be32(rcap);
- return t4_wr_mbox_meat_timeout(adapter, mbox, &cmd, sizeof(cmd), NULL,
- sleep_ok, timeout);
+
+ ret = t4_wr_mbox_meat_timeout(adapter, mbox, &cmd, sizeof(cmd), NULL,
+ sleep_ok, timeout);
+ if (ret) {
+ dev_err(adapter->pdev_dev,
+ "Requested Port Capabilities %#x rejected, error %d\n",
+ rcap, -ret);
+ return ret;
+ }
+ return ret;
}
/**
@@ -7998,6 +8014,34 @@ int t4_enable_vi(struct adapter *adap, unsigned int mbox, unsigned int viid,
}
/**
+ * t4_enable_pi_params - enable/disable a Port's Virtual Interface
+ * @adap: the adapter
+ * @mbox: mailbox to use for the FW command
+ * @pi: the Port Information structure
+ * @rx_en: 1=enable Rx, 0=disable Rx
+ * @tx_en: 1=enable Tx, 0=disable Tx
+ * @dcb_en: 1=enable delivery of Data Center Bridging messages.
+ *
+ * Enables/disables a Port's Virtual Interface. Note that setting DCB
+ * Enable only makes sense when enabling a Virtual Interface ...
+ * If the Virtual Interface enable/disable operation is successful,
+ * we notify the OS-specific code of a potential Link Status change
+ * via the OS Contract API t4_os_link_changed().
+ */
+int t4_enable_pi_params(struct adapter *adap, unsigned int mbox,
+ struct port_info *pi,
+ bool rx_en, bool tx_en, bool dcb_en)
+{
+ int ret = t4_enable_vi_params(adap, mbox, pi->viid,
+ rx_en, tx_en, dcb_en);
+ if (ret)
+ return ret;
+ t4_os_link_changed(adap, pi->port_id,
+ rx_en && tx_en && pi->link_cfg.link_ok);
+ return 0;
+}
+
+/**
* t4_identify_port - identify a VI's port by blinking its LED
* @adap: the adapter
* @mbox: mailbox to use for the FW command
@@ -8395,7 +8439,9 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
lc->lpacaps = lpacaps;
lc->acaps = acaps & ADVERT_MASK;
- if (lc->acaps & FW_PORT_CAP32_ANEG) {
+ if (!(lc->acaps & FW_PORT_CAP32_ANEG)) {
+ lc->autoneg = AUTONEG_DISABLE;
+ } else if (lc->acaps & FW_PORT_CAP32_ANEG) {
lc->autoneg = AUTONEG_ENABLE;
} else {
/* When Autoneg is disabled, user needs to set
@@ -8600,6 +8646,13 @@ static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps,
lc->requested_fec = FEC_AUTO;
lc->fec = fwcap_to_cc_fec(lc->def_acaps);
+ /* If the Port is capable of Auto-Negtotiation, initialize it as
+ * "enabled" and copy over all of the Physical Port Capabilities
+ * to the Advertised Port Capabilities. Otherwise mark it as
+ * Auto-Negotiate disabled and select the highest supported speed
+ * for the link. Note parallel structure in t4_link_l1cfg_core()
+ * and t4_handle_get_port_info().
+ */
if (lc->pcaps & FW_PORT_CAP32_ANEG) {
lc->acaps = lc->pcaps & ADVERT_MASK;
lc->autoneg = AUTONEG_ENABLE;
@@ -8607,6 +8660,7 @@ static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps,
} else {
lc->acaps = 0;
lc->autoneg = AUTONEG_DISABLE;
+ lc->speed_caps = fwcap_to_fwspeed(acaps);
}
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 71f13bd2b5e4..ff84791a0ff8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -274,7 +274,7 @@ static int link_start(struct net_device *dev)
* is enabled on a port.
*/
if (ret == 0)
- ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
+ ret = t4vf_enable_pi(pi->adapter, pi, true, true);
/* The Virtual Interfaces are connected to an internal switch on the
* chip which allows VIs attached to the same port to talk to each
@@ -822,8 +822,7 @@ static int cxgb4vf_stop(struct net_device *dev)
netif_tx_stop_all_queues(dev);
netif_carrier_off(dev);
- t4vf_enable_vi(adapter, pi->viid, false, false);
- pi->link_cfg.link_ok = 0;
+ t4vf_enable_pi(adapter, pi, false, false);
clear_bit(pi->port_id, &adapter->open_device_map);
if (adapter->open_device_map == 0)
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 712e8f0c71b4..ccca67cf4487 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -391,7 +391,10 @@ int t4vf_config_rss_range(struct adapter *, unsigned int, int, int,
int t4vf_alloc_vi(struct adapter *, int);
int t4vf_free_vi(struct adapter *, int);
-int t4vf_enable_vi(struct adapter *, unsigned int, bool, bool);
+int t4vf_enable_vi(struct adapter *adapter, unsigned int viid, bool rx_en,
+ bool tx_en);
+int t4vf_enable_pi(struct adapter *adapter, struct port_info *pi, bool rx_en,
+ bool tx_en);
int t4vf_identify_port(struct adapter *, unsigned int, unsigned int);
int t4vf_set_rxmode(struct adapter *, unsigned int, int, int, int, int, int,
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 3017f7873ff9..5b8c08cf523f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -405,6 +405,36 @@ static unsigned int fwcap_to_speed(fw_port_cap32_t caps)
return 0;
}
+/**
+ * fwcap_to_fwspeed - return highest speed in Port Capabilities
+ * @acaps: advertised Port Capabilities
+ *
+ * Get the highest speed for the port from the advertised Port
+ * Capabilities. It will be either the highest speed from the list of
+ * speeds or whatever user has set using ethtool.
+ */
+static fw_port_cap32_t fwcap_to_fwspeed(fw_port_cap32_t acaps)
+{
+ #define TEST_SPEED_RETURN(__caps_speed) \
+ do { \
+ if (acaps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+ return FW_PORT_CAP32_SPEED_##__caps_speed; \
+ } while (0)
+
+ TEST_SPEED_RETURN(400G);
+ TEST_SPEED_RETURN(200G);
+ TEST_SPEED_RETURN(100G);
+ TEST_SPEED_RETURN(50G);
+ TEST_SPEED_RETURN(40G);
+ TEST_SPEED_RETURN(25G);
+ TEST_SPEED_RETURN(10G);
+ TEST_SPEED_RETURN(1G);
+ TEST_SPEED_RETURN(100M);
+
+ #undef TEST_SPEED_RETURN
+ return 0;
+}
+
/*
* init_link_config - initialize a link's SW state
* @lc: structure holding the link state
@@ -431,6 +461,13 @@ static void init_link_config(struct link_config *lc,
lc->requested_fec = FEC_AUTO;
lc->fec = lc->auto_fec;
+ /* If the Port is capable of Auto-Negtotiation, initialize it as
+ * "enabled" and copy over all of the Physical Port Capabilities
+ * to the Advertised Port Capabilities. Otherwise mark it as
+ * Auto-Negotiate disabled and select the highest supported speed
+ * for the link. Note parallel structure in t4_link_l1cfg_core()
+ * and t4_handle_get_port_info().
+ */
if (lc->pcaps & FW_PORT_CAP32_ANEG) {
lc->acaps = acaps & ADVERT_MASK;
lc->autoneg = AUTONEG_ENABLE;
@@ -438,6 +475,7 @@ static void init_link_config(struct link_config *lc,
} else {
lc->acaps = 0;
lc->autoneg = AUTONEG_DISABLE;
+ lc->speed_caps = fwcap_to_fwspeed(acaps);
}
}
@@ -1362,6 +1400,30 @@ int t4vf_enable_vi(struct adapter *adapter, unsigned int viid,
}
/**
+ * t4vf_enable_pi - enable/disable a Port's virtual interface
+ * @adapter: the adapter
+ * @pi: the Port Information structure
+ * @rx_en: 1=enable Rx, 0=disable Rx
+ * @tx_en: 1=enable Tx, 0=disable Tx
+ *
+ * Enables/disables a Port's virtual interface. If the Virtual
+ * Interface enable/disable operation is successful, we notify the
+ * OS-specific code of a potential Link Status change via the OS Contract
+ * API t4vf_os_link_changed().
+ */
+int t4vf_enable_pi(struct adapter *adapter, struct port_info *pi,
+ bool rx_en, bool tx_en)
+{
+ int ret = t4vf_enable_vi(adapter, pi->viid, rx_en, tx_en);
+
+ if (ret)
+ return ret;
+ t4vf_os_link_changed(adapter, pi->pidx,
+ rx_en && tx_en && pi->link_cfg.link_ok);
+ return 0;
+}
+
+/**
* t4vf_identify_port - identify a VI's port by blinking its LED
* @adapter: the adapter
* @viid: the Virtual Interface ID
@@ -1955,7 +2017,14 @@ static void t4vf_handle_get_port_info(struct port_info *pi,
lc->lpacaps = lpacaps;
lc->acaps = acaps & ADVERT_MASK;
- if (lc->acaps & FW_PORT_CAP32_ANEG) {
+ /* If we're not physically capable of Auto-Negotiation, note
+ * this as Auto-Negotiation disabled. Otherwise, we track
+ * what Auto-Negotiation settings we have. Note parallel
+ * structure in init_link_config().
+ */
+ if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
+ lc->autoneg = AUTONEG_DISABLE;
+ } else if (lc->acaps & FW_PORT_CAP32_ANEG) {
lc->autoneg = AUTONEG_ENABLE;
} else {
/* When Autoneg is disabled, user needs to set
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index f81439796ac7..d438ef8a371d 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -466,12 +466,6 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp,
return -EOPNOTSUPP;
}
-/**
- * fec_ptp_hwtstamp_ioctl - control hardware time stamping
- * @ndev: pointer to net_device
- * @ifreq: ioctl data
- * @cmd: particular ioctl requested
- */
int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr)
{
struct fec_enet_private *fep = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4bb4646a5f92..09f8e6baf049 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -109,13 +109,14 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
struct ibmvnic_sub_crq_queue *);
static int ibmvnic_poll(struct napi_struct *napi, int data);
static void send_map_query(struct ibmvnic_adapter *adapter);
-static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
-static void send_request_unmap(struct ibmvnic_adapter *, u8);
+static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
+static int send_request_unmap(struct ibmvnic_adapter *, u8);
static int send_login(struct ibmvnic_adapter *adapter);
static void send_cap_queries(struct ibmvnic_adapter *adapter);
static int init_sub_crqs(struct ibmvnic_adapter *);
static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
static int ibmvnic_init(struct ibmvnic_adapter *);
+static int ibmvnic_reset_init(struct ibmvnic_adapter *);
static void release_crq_queue(struct ibmvnic_adapter *);
static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p);
static int init_crq_queue(struct ibmvnic_adapter *adapter);
@@ -172,6 +173,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb, int size)
{
struct device *dev = &adapter->vdev->dev;
+ int rc;
ltb->size = size;
ltb->buff = dma_alloc_coherent(dev, ltb->size, &ltb->addr,
@@ -185,8 +187,12 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter,
adapter->map_id++;
init_completion(&adapter->fw_done);
- send_request_map(adapter, ltb->addr,
- ltb->size, ltb->map_id);
+ rc = send_request_map(adapter, ltb->addr,
+ ltb->size, ltb->map_id);
+ if (rc) {
+ dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
+ return rc;
+ }
wait_for_completion(&adapter->fw_done);
if (adapter->fw_done_rc) {
@@ -215,10 +221,14 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
static int reset_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb)
{
+ int rc;
+
memset(ltb->buff, 0, ltb->size);
init_completion(&adapter->fw_done);
- send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
+ rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id);
+ if (rc)
+ return rc;
wait_for_completion(&adapter->fw_done);
if (adapter->fw_done_rc) {
@@ -789,6 +799,7 @@ static void release_napi(struct ibmvnic_adapter *adapter)
kfree(adapter->napi);
adapter->napi = NULL;
adapter->num_active_rx_napi = 0;
+ adapter->napi_enabled = false;
}
static int ibmvnic_login(struct net_device *netdev)
@@ -919,6 +930,10 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
/* Partuial success, delay and re-send */
mdelay(1000);
resend = true;
+ } else if (adapter->init_done_rc) {
+ netdev_warn(netdev, "Unable to set link state, rc=%d\n",
+ adapter->init_done_rc);
+ return adapter->init_done_rc;
}
} while (resend);
@@ -951,6 +966,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
struct device *dev = &adapter->vdev->dev;
union ibmvnic_crq crq;
int len = 0;
+ int rc;
if (adapter->vpd->buff)
len = adapter->vpd->len;
@@ -958,7 +974,9 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
init_completion(&adapter->fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
- ibmvnic_send_crq(adapter, &crq);
+ rc = ibmvnic_send_crq(adapter, &crq);
+ if (rc)
+ return rc;
wait_for_completion(&adapter->fw_done);
if (!adapter->vpd->len)
@@ -991,7 +1009,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
crq.get_vpd.cmd = GET_VPD;
crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len);
- ibmvnic_send_crq(adapter, &crq);
+ rc = ibmvnic_send_crq(adapter, &crq);
+ if (rc) {
+ kfree(adapter->vpd->buff);
+ adapter->vpd->buff = NULL;
+ return rc;
+ }
wait_for_completion(&adapter->fw_done);
return 0;
@@ -1690,6 +1713,7 @@ static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p)
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
struct sockaddr *addr = p;
union ibmvnic_crq crq;
+ int rc;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
@@ -1700,7 +1724,9 @@ static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p)
ether_addr_copy(&crq.change_mac_addr.mac_addr[0], addr->sa_data);
init_completion(&adapter->fw_done);
- ibmvnic_send_crq(adapter, &crq);
+ rc = ibmvnic_send_crq(adapter, &crq);
+ if (rc)
+ return rc;
wait_for_completion(&adapter->fw_done);
/* netdev->dev_addr is changed in handle_change_mac_rsp function */
return adapter->fw_done_rc ? -EIO : 0;
@@ -1782,7 +1808,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return rc;
}
- rc = ibmvnic_init(adapter);
+ rc = ibmvnic_reset_init(adapter);
if (rc)
return IBMVNIC_INIT_FAILED;
@@ -1852,6 +1878,85 @@ static int do_reset(struct ibmvnic_adapter *adapter,
return 0;
}
+static int do_hard_reset(struct ibmvnic_adapter *adapter,
+ struct ibmvnic_rwi *rwi, u32 reset_state)
+{
+ struct net_device *netdev = adapter->netdev;
+ int rc;
+
+ netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n",
+ rwi->reset_reason);
+
+ netif_carrier_off(netdev);
+ adapter->reset_reason = rwi->reset_reason;
+
+ ibmvnic_cleanup(netdev);
+ release_resources(adapter);
+ release_sub_crqs(adapter, 0);
+ release_crq_queue(adapter);
+
+ /* remove the closed state so when we call open it appears
+ * we are coming from the probed state.
+ */
+ adapter->state = VNIC_PROBED;
+
+ rc = init_crq_queue(adapter);
+ if (rc) {
+ netdev_err(adapter->netdev,
+ "Couldn't initialize crq. rc=%d\n", rc);
+ return rc;
+ }
+
+ rc = ibmvnic_init(adapter);
+ if (rc)
+ return rc;
+
+ /* If the adapter was in PROBE state prior to the reset,
+ * exit here.
+ */
+ if (reset_state == VNIC_PROBED)
+ return 0;
+
+ rc = ibmvnic_login(netdev);
+ if (rc) {
+ adapter->state = VNIC_PROBED;
+ return 0;
+ }
+ /* netif_set_real_num_xx_queues needs to take rtnl lock here
+ * unless wait_for_reset is set, in which case the rtnl lock
+ * has already been taken before initializing the reset
+ */
+ if (!adapter->wait_for_reset) {
+ rtnl_lock();
+ rc = init_resources(adapter);
+ rtnl_unlock();
+ } else {
+ rc = init_resources(adapter);
+ }
+ if (rc)
+ return rc;
+
+ ibmvnic_disable_irqs(adapter);
+ adapter->state = VNIC_CLOSED;
+
+ if (reset_state == VNIC_CLOSED)
+ return 0;
+
+ rc = __ibmvnic_open(netdev);
+ if (rc) {
+ if (list_empty(&adapter->rwi_list))
+ adapter->state = VNIC_CLOSED;
+ else
+ adapter->state = reset_state;
+
+ return 0;
+ }
+
+ netif_carrier_on(netdev);
+
+ return 0;
+}
+
static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_rwi *rwi;
@@ -1893,14 +1998,19 @@ static void __ibmvnic_reset(struct work_struct *work)
netdev = adapter->netdev;
mutex_lock(&adapter->reset_lock);
- adapter->resetting = true;
reset_state = adapter->state;
rwi = get_next_rwi(adapter);
while (rwi) {
- rc = do_reset(adapter, rwi, reset_state);
+ if (adapter->force_reset_recovery) {
+ adapter->force_reset_recovery = false;
+ rc = do_hard_reset(adapter, rwi, reset_state);
+ } else {
+ rc = do_reset(adapter, rwi, reset_state);
+ }
kfree(rwi);
- if (rc && rc != IBMVNIC_INIT_FAILED)
+ if (rc && rc != IBMVNIC_INIT_FAILED &&
+ !adapter->force_reset_recovery)
break;
rwi = get_next_rwi(adapter);
@@ -1926,9 +2036,9 @@ static void __ibmvnic_reset(struct work_struct *work)
static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
enum ibmvnic_reset_reason reason)
{
+ struct list_head *entry, *tmp_entry;
struct ibmvnic_rwi *rwi, *tmp;
struct net_device *netdev = adapter->netdev;
- struct list_head *entry;
int ret;
if (adapter->state == VNIC_REMOVING ||
@@ -1964,11 +2074,17 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
ret = ENOMEM;
goto err;
}
-
+ /* if we just received a transport event,
+ * flush reset queue and process this reset
+ */
+ if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) {
+ list_for_each_safe(entry, tmp_entry, &adapter->rwi_list)
+ list_del(entry);
+ }
rwi->reset_reason = reason;
list_add_tail(&rwi->list, &adapter->rwi_list);
mutex_unlock(&adapter->rwi_lock);
-
+ adapter->resetting = true;
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(&adapter->ibmvnic_reset);
@@ -2364,6 +2480,7 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
struct ibmvnic_adapter *adapter = netdev_priv(dev);
union ibmvnic_crq crq;
int i, j;
+ int rc;
memset(&crq, 0, sizeof(crq));
crq.request_statistics.first = IBMVNIC_CRQ_CMD;
@@ -2374,7 +2491,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
/* Wait for data to be written */
init_completion(&adapter->stats_done);
- ibmvnic_send_crq(adapter, &crq);
+ rc = ibmvnic_send_crq(adapter, &crq);
+ if (rc)
+ return;
wait_for_completion(&adapter->stats_done);
for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
@@ -3146,6 +3265,12 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter,
(unsigned long int)cpu_to_be64(u64_crq[0]),
(unsigned long int)cpu_to_be64(u64_crq[1]));
+ if (!adapter->crq.active &&
+ crq->generic.first != IBMVNIC_CRQ_INIT_CMD) {
+ dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n");
+ return -EINVAL;
+ }
+
/* Make sure the hypervisor sees the complete request */
mb();
@@ -3370,8 +3495,8 @@ buf_alloc_failed:
return -1;
}
-static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
- u32 len, u8 map_id)
+static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
+ u32 len, u8 map_id)
{
union ibmvnic_crq crq;
@@ -3381,10 +3506,10 @@ static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
crq.request_map.map_id = map_id;
crq.request_map.ioba = cpu_to_be32(addr);
crq.request_map.len = cpu_to_be32(len);
- ibmvnic_send_crq(adapter, &crq);
+ return ibmvnic_send_crq(adapter, &crq);
}
-static void send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
+static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
{
union ibmvnic_crq crq;
@@ -3392,7 +3517,7 @@ static void send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id)
crq.request_unmap.first = IBMVNIC_CRQ_CMD;
crq.request_unmap.cmd = REQUEST_UNMAP;
crq.request_unmap.map_id = map_id;
- ibmvnic_send_crq(adapter, &crq);
+ return ibmvnic_send_crq(adapter, &crq);
}
static void send_map_query(struct ibmvnic_adapter *adapter)
@@ -4219,11 +4344,15 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
dev_info(dev, "Partner initialized\n");
adapter->from_passive_init = true;
adapter->failover_pending = false;
- complete(&adapter->init_done);
+ if (!completion_done(&adapter->init_done)) {
+ complete(&adapter->init_done);
+ adapter->init_done_rc = -EIO;
+ }
ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
break;
case IBMVNIC_CRQ_INIT_COMPLETE:
dev_info(dev, "Partner initialization complete\n");
+ adapter->crq.active = true;
send_version_xchg(adapter);
break;
default:
@@ -4232,6 +4361,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
return;
case IBMVNIC_CRQ_XPORT_EVENT:
netif_carrier_off(netdev);
+ adapter->crq.active = false;
+ if (adapter->resetting)
+ adapter->force_reset_recovery = true;
if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
dev_info(dev, "Migrated, re-enabling adapter\n");
ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
@@ -4419,6 +4551,7 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter)
/* Clean out the queue */
memset(crq->msgs, 0, PAGE_SIZE);
crq->cur = 0;
+ crq->active = false;
/* And re-open it again */
rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
@@ -4453,6 +4586,7 @@ static void release_crq_queue(struct ibmvnic_adapter *adapter)
DMA_BIDIRECTIONAL);
free_page((unsigned long)crq->msgs);
crq->msgs = NULL;
+ crq->active = false;
}
static int init_crq_queue(struct ibmvnic_adapter *adapter)
@@ -4530,7 +4664,7 @@ map_failed:
return retrc;
}
-static int ibmvnic_init(struct ibmvnic_adapter *adapter)
+static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter)
{
struct device *dev = &adapter->vdev->dev;
unsigned long timeout = msecs_to_jiffies(30000);
@@ -4589,6 +4723,49 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
return rc;
}
+static int ibmvnic_init(struct ibmvnic_adapter *adapter)
+{
+ struct device *dev = &adapter->vdev->dev;
+ unsigned long timeout = msecs_to_jiffies(30000);
+ int rc;
+
+ adapter->from_passive_init = false;
+
+ init_completion(&adapter->init_done);
+ adapter->init_done_rc = 0;
+ ibmvnic_send_crq_init(adapter);
+ if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
+ dev_err(dev, "Initialization sequence timed out\n");
+ return -1;
+ }
+
+ if (adapter->init_done_rc) {
+ release_crq_queue(adapter);
+ return adapter->init_done_rc;
+ }
+
+ if (adapter->from_passive_init) {
+ adapter->state = VNIC_OPEN;
+ adapter->from_passive_init = false;
+ return -1;
+ }
+
+ rc = init_sub_crqs(adapter);
+ if (rc) {
+ dev_err(dev, "Initialization of sub crqs failed\n");
+ release_crq_queue(adapter);
+ return rc;
+ }
+
+ rc = init_sub_crq_irqs(adapter);
+ if (rc) {
+ dev_err(dev, "Failed to initialize sub crq irqs\n");
+ release_crq_queue(adapter);
+ }
+
+ return rc;
+}
+
static struct device_attribute dev_attr_failover;
static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 22391e8805f6..f9fb780102ac 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -865,6 +865,7 @@ struct ibmvnic_crq_queue {
int size, cur;
dma_addr_t msg_token;
spinlock_t lock;
+ bool active;
};
union sub_crq {
@@ -1108,6 +1109,7 @@ struct ibmvnic_adapter {
bool mac_change_pending;
bool failover_pending;
+ bool force_reset_recovery;
struct ibmvnic_tunables desired;
struct ibmvnic_tunables fallback;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 5efa68de935b..9b698c5acd05 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* @dev: netdev
* @xdp: XDP buffer
*
- * Returns Zero if sent, else an error code
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ *
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
**/
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
- int err;
+ int drops = 0;
+ int i;
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;
@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO;
- err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
- if (err != I40E_XDP_TX)
- return -ENOSPC;
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
- return 0;
+ err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
+ if (err != I40E_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ return n - drops;
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index fdd2c55f03a6..eb8804b3d7b6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
-int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
void i40e_xdp_flush(struct net_device *dev);
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a52d92e182ee..031d65c4178d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10022,11 +10022,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
-static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+static int ixgbe_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring;
- int err;
+ int drops = 0;
+ int i;
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN;
@@ -10038,11 +10040,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (unlikely(!ring))
return -ENXIO;
- err = ixgbe_xmit_xdp_ring(adapter, xdpf);
- if (err != IXGBE_XDP_TX)
- return -ENOSPC;
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
- return 0;
+ err = ixgbe_xmit_xdp_ring(adapter, xdpf);
+ if (err != IXGBE_XDP_TX) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ return n - drops;
}
static void ixgbe_xdp_flush(struct net_device *dev)
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 6373f56205fd..4afb10375397 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -37,6 +37,7 @@ ifeq ($(CONFIG_NFP_APP_FLOWER),y)
nfp-objs += \
flower/action.o \
flower/cmsg.o \
+ flower/lag_conf.o \
flower/main.o \
flower/match.o \
flower/metadata.o \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index a4d3da215863..8a92088df0d7 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -212,6 +212,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
}
static void
+__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
+ bool set, bool src_lmextn)
+{
+ u16 addr_lo, addr_hi;
+ u64 insn;
+
+ addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
+ addr_hi = addr != addr_lo;
+
+ insn = OP_BR_BIT_BASE |
+ FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
+ FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
+ FIELD_PREP(OP_BR_BIT_BV, set) |
+ FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
+ FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
+ FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
+ FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
+
+ nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
+ u8 defer, bool set, enum nfp_relo_type relo)
+{
+ struct nfp_insn_re_regs reg;
+ int err;
+
+ /* NOTE: The bit to test is specified as an rotation amount, such that
+ * the bit to test will be placed on the MSB of the result when
+ * doing a rotate right. For bit X, we need right rotate X + 1.
+ */
+ bit += 1;
+
+ err = swreg_to_restricted(reg_none(), src, reg_imm(bit), &reg, false);
+ if (err) {
+ nfp_prog->error = err;
+ return;
+ }
+
+ __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
+ reg.src_lmextn);
+
+ nfp_prog->prog[nfp_prog->prog_len - 1] |=
+ FIELD_PREP(OP_RELO_TYPE, relo);
+}
+
+static void
+emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
+{
+ emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
+}
+
+static void
__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
enum immed_width width, bool invert,
enum immed_shift shift, bool wr_both,
@@ -310,6 +364,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst,
}
static void
+emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
+ swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
+{
+ if (sc == SHF_SC_R_ROT) {
+ pr_err("indirect shift is not allowed on rotation\n");
+ nfp_prog->error = -EFAULT;
+ return;
+ }
+
+ emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
+}
+
+static void
__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
bool dst_lmextn, bool src_lmextn)
@@ -1629,26 +1696,142 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
+/* Pseudo code:
+ * if shift_amt >= 32
+ * dst_high = dst_low << shift_amt[4:0]
+ * dst_low = 0;
+ * else
+ * dst_high = (dst_high, dst_low) >> (32 - shift_amt)
+ * dst_low = dst_low << shift_amt
+ *
+ * The indirect shift will use the same logic at runtime.
+ */
+static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+ if (shift_amt < 32) {
+ emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
+ SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
+ 32 - shift_amt);
+ emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_L_SHF, shift_amt);
+ } else if (shift_amt == 32) {
+ wrp_reg_mov(nfp_prog, dst + 1, dst);
+ wrp_immed(nfp_prog, reg_both(dst), 0);
+ } else if (shift_amt > 32) {
+ emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
+ wrp_immed(nfp_prog, reg_both(dst), 0);
+ }
+
+ return 0;
+}
+
static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
- if (insn->imm < 32) {
- emit_shf(nfp_prog, reg_both(dst + 1),
- reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
- SHF_SC_R_DSHF, 32 - insn->imm);
- emit_shf(nfp_prog, reg_both(dst),
- reg_none(), SHF_OP_NONE, reg_b(dst),
- SHF_SC_L_SHF, insn->imm);
- } else if (insn->imm == 32) {
- wrp_reg_mov(nfp_prog, dst + 1, dst);
- wrp_immed(nfp_prog, reg_both(dst), 0);
- } else if (insn->imm > 32) {
- emit_shf(nfp_prog, reg_both(dst + 1),
- reg_none(), SHF_OP_NONE, reg_b(dst),
- SHF_SC_L_SHF, insn->imm - 32);
- wrp_immed(nfp_prog, reg_both(dst), 0);
+ return __shl_imm64(nfp_prog, dst, insn->imm);
+}
+
+static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
+ reg_b(src));
+ emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
+ emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_R_DSHF);
+}
+
+/* NOTE: for indirect left shift, HIGH part should be calculated first. */
+static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+ emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_L_SHF);
+}
+
+static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ shl_reg64_lt32_high(nfp_prog, dst, src);
+ shl_reg64_lt32_low(nfp_prog, dst, src);
+}
+
+static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+ emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_L_SHF);
+ wrp_immed(nfp_prog, reg_both(dst), 0);
+}
+
+static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 umin, umax;
+ u8 dst, src;
+
+ dst = insn->dst_reg * 2;
+ umin = meta->umin;
+ umax = meta->umax;
+ if (umin == umax)
+ return __shl_imm64(nfp_prog, dst, umin);
+
+ src = insn->src_reg * 2;
+ if (umax < 32) {
+ shl_reg64_lt32(nfp_prog, dst, src);
+ } else if (umin >= 32) {
+ shl_reg64_ge32(nfp_prog, dst, src);
+ } else {
+ /* Generate different instruction sequences depending on runtime
+ * value of shift amount.
+ */
+ u16 label_ge32, label_end;
+
+ label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
+ emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+
+ shl_reg64_lt32_high(nfp_prog, dst, src);
+ label_end = nfp_prog_current_offset(nfp_prog) + 6;
+ emit_br(nfp_prog, BR_UNC, label_end, 2);
+ /* shl_reg64_lt32_low packed in delay slot. */
+ shl_reg64_lt32_low(nfp_prog, dst, src);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+ return -EINVAL;
+ shl_reg64_ge32(nfp_prog, dst, src);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Pseudo code:
+ * if shift_amt >= 32
+ * dst_high = 0;
+ * dst_low = dst_high >> shift_amt[4:0]
+ * else
+ * dst_high = dst_high >> shift_amt
+ * dst_low = (dst_high, dst_low) >> shift_amt
+ *
+ * The indirect shift will use the same logic at runtime.
+ */
+static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+ if (shift_amt < 32) {
+ emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_R_DSHF, shift_amt);
+ emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+ reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
+ } else if (shift_amt == 32) {
+ wrp_reg_mov(nfp_prog, dst, dst + 1);
+ wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+ } else if (shift_amt > 32) {
+ emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+ reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
+ wrp_immed(nfp_prog, reg_both(dst + 1), 0);
}
return 0;
@@ -1659,21 +1842,186 @@ static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
- if (insn->imm < 32) {
- emit_shf(nfp_prog, reg_both(dst),
- reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
- SHF_SC_R_DSHF, insn->imm);
- emit_shf(nfp_prog, reg_both(dst + 1),
- reg_none(), SHF_OP_NONE, reg_b(dst + 1),
- SHF_SC_R_SHF, insn->imm);
- } else if (insn->imm == 32) {
+ return __shr_imm64(nfp_prog, dst, insn->imm);
+}
+
+/* NOTE: for indirect right shift, LOW part should be calculated first. */
+static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+ emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
+ reg_b(dst + 1), SHF_SC_R_SHF);
+}
+
+static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+ emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_R_DSHF);
+}
+
+static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ shr_reg64_lt32_low(nfp_prog, dst, src);
+ shr_reg64_lt32_high(nfp_prog, dst, src);
+}
+
+static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
+ emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
+ reg_b(dst + 1), SHF_SC_R_SHF);
+ wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+}
+
+static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 umin, umax;
+ u8 dst, src;
+
+ dst = insn->dst_reg * 2;
+ umin = meta->umin;
+ umax = meta->umax;
+ if (umin == umax)
+ return __shr_imm64(nfp_prog, dst, umin);
+
+ src = insn->src_reg * 2;
+ if (umax < 32) {
+ shr_reg64_lt32(nfp_prog, dst, src);
+ } else if (umin >= 32) {
+ shr_reg64_ge32(nfp_prog, dst, src);
+ } else {
+ /* Generate different instruction sequences depending on runtime
+ * value of shift amount.
+ */
+ u16 label_ge32, label_end;
+
+ label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
+ emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+ shr_reg64_lt32_low(nfp_prog, dst, src);
+ label_end = nfp_prog_current_offset(nfp_prog) + 6;
+ emit_br(nfp_prog, BR_UNC, label_end, 2);
+ /* shr_reg64_lt32_high packed in delay slot. */
+ shr_reg64_lt32_high(nfp_prog, dst, src);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+ return -EINVAL;
+ shr_reg64_ge32(nfp_prog, dst, src);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
+ * told through PREV_ALU result.
+ */
+static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
+{
+ if (shift_amt < 32) {
+ emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
+ reg_b(dst), SHF_SC_R_DSHF, shift_amt);
+ /* Set signedness bit. */
+ emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
+ reg_imm(0));
+ emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+ reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
+ } else if (shift_amt == 32) {
+ /* NOTE: this also helps setting signedness bit. */
wrp_reg_mov(nfp_prog, dst, dst + 1);
- wrp_immed(nfp_prog, reg_both(dst + 1), 0);
- } else if (insn->imm > 32) {
- emit_shf(nfp_prog, reg_both(dst),
- reg_none(), SHF_OP_NONE, reg_b(dst + 1),
- SHF_SC_R_SHF, insn->imm - 32);
- wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+ emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+ reg_b(dst + 1), SHF_SC_R_SHF, 31);
+ } else if (shift_amt > 32) {
+ emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
+ reg_imm(0));
+ emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+ reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
+ emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+ reg_b(dst + 1), SHF_SC_R_SHF, 31);
+ }
+
+ return 0;
+}
+
+static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u8 dst = insn->dst_reg * 2;
+
+ return __ashr_imm64(nfp_prog, dst, insn->imm);
+}
+
+static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ /* NOTE: the first insn will set both indirect shift amount (source A)
+ * and signedness bit (MSB of result).
+ */
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
+ emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+ reg_b(dst + 1), SHF_SC_R_SHF);
+}
+
+static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ /* NOTE: it is the same as logic shift because we don't need to shift in
+ * signedness bit when the shift amount is less than 32.
+ */
+ return shr_reg64_lt32_low(nfp_prog, dst, src);
+}
+
+static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ ashr_reg64_lt32_low(nfp_prog, dst, src);
+ ashr_reg64_lt32_high(nfp_prog, dst, src);
+}
+
+static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
+{
+ emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
+ emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
+ reg_b(dst + 1), SHF_SC_R_SHF);
+ emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
+ reg_b(dst + 1), SHF_SC_R_SHF, 31);
+}
+
+/* Like ashr_imm64, but need to use indirect shift. */
+static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u64 umin, umax;
+ u8 dst, src;
+
+ dst = insn->dst_reg * 2;
+ umin = meta->umin;
+ umax = meta->umax;
+ if (umin == umax)
+ return __ashr_imm64(nfp_prog, dst, umin);
+
+ src = insn->src_reg * 2;
+ if (umax < 32) {
+ ashr_reg64_lt32(nfp_prog, dst, src);
+ } else if (umin >= 32) {
+ ashr_reg64_ge32(nfp_prog, dst, src);
+ } else {
+ u16 label_ge32, label_end;
+
+ label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
+ emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
+ ashr_reg64_lt32_low(nfp_prog, dst, src);
+ label_end = nfp_prog_current_offset(nfp_prog) + 6;
+ emit_br(nfp_prog, BR_UNC, label_end, 2);
+ /* ashr_reg64_lt32_high packed in delay slot. */
+ ashr_reg64_lt32_high(nfp_prog, dst, src);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
+ return -EINVAL;
+ ashr_reg64_ge32(nfp_prog, dst, src);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
+ return -EINVAL;
}
return 0;
@@ -2501,8 +2849,12 @@ static const instr_cb_t instr_cb[256] = {
[BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
[BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
[BPF_ALU64 | BPF_NEG] = neg_reg64,
+ [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
[BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
+ [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
[BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
+ [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
+ [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
[BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
[BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
[BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 8b143546ae85..654fe7823e5e 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -263,6 +263,8 @@ struct nfp_bpf_reg_state {
* @func_id: function id for call instructions
* @arg1: arg1 for call instructions
* @arg2: arg2 for call instructions
+ * @umin: copy of core verifier umin_value.
+ * @umax: copy of core verifier umax_value.
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
* @flags: eBPF instruction extra optimization flags
@@ -298,6 +300,13 @@ struct nfp_insn_meta {
struct bpf_reg_state arg1;
struct nfp_bpf_reg_state arg2;
};
+ /* We are interested in range info for some operands,
+ * for example, the shift amount.
+ */
+ struct {
+ u64 umin;
+ u64 umax;
+ };
};
unsigned int off;
unsigned short n;
@@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
}
+static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
+{
+ u8 code = meta->insn.code;
+ bool is_alu, is_shift;
+ u8 opclass, opcode;
+
+ opclass = BPF_CLASS(code);
+ is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
+ if (!is_alu)
+ return false;
+
+ opcode = BPF_OP(code);
+ is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
+ if (!is_shift)
+ return false;
+
+ return BPF_SRC(code) == BPF_X;
+}
+
/**
* struct nfp_prog - nfp BPF program
* @bpf: backpointer to the bpf app priv structure
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 4db0ac1e42a8..7eae4c0266f8 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
meta->insn = prog[i];
meta->n = i;
+ if (is_mbpf_indir_shift(meta))
+ meta->umin = U64_MAX;
list_add_tail(&meta->l, &nfp_prog->insns);
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 844a9be6e55a..4bfeba7b21b2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
if (is_mbpf_xadd(meta))
return nfp_bpf_check_xadd(nfp_prog, meta, env);
+ if (is_mbpf_indir_shift(meta)) {
+ const struct bpf_reg_state *sreg =
+ cur_regs(env) + meta->insn.src_reg;
+
+ meta->umin = min(meta->umin, sreg->umin_value);
+ meta->umax = max(meta->umax, sreg->umax_value);
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 80df9a5d4217..4a6d2db75071 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -72,6 +72,42 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
}
+static int
+nfp_fl_pre_lag(struct nfp_app *app, const struct tc_action *action,
+ struct nfp_fl_payload *nfp_flow, int act_len)
+{
+ size_t act_size = sizeof(struct nfp_fl_pre_lag);
+ struct nfp_fl_pre_lag *pre_lag;
+ struct net_device *out_dev;
+ int err;
+
+ out_dev = tcf_mirred_dev(action);
+ if (!out_dev || !netif_is_lag_master(out_dev))
+ return 0;
+
+ if (act_len + act_size > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ /* Pre_lag action must be first on action list.
+ * If other actions already exist they need pushed forward.
+ */
+ if (act_len)
+ memmove(nfp_flow->action_data + act_size,
+ nfp_flow->action_data, act_len);
+
+ pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data;
+ err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag);
+ if (err)
+ return err;
+
+ pre_lag->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_LAG;
+ pre_lag->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+ nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+ return act_size;
+}
+
static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
enum nfp_flower_tun_type tun_type)
{
@@ -88,12 +124,13 @@ static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
}
static int
-nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
- struct nfp_fl_payload *nfp_flow, bool last,
- struct net_device *in_dev, enum nfp_flower_tun_type tun_type,
- int *tun_out_cnt)
+nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
+ const struct tc_action *action, struct nfp_fl_payload *nfp_flow,
+ bool last, struct net_device *in_dev,
+ enum nfp_flower_tun_type tun_type, int *tun_out_cnt)
{
size_t act_size = sizeof(struct nfp_fl_output);
+ struct nfp_flower_priv *priv = app->priv;
struct net_device *out_dev;
u16 tmp_flags;
@@ -118,6 +155,15 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
output->flags = cpu_to_be16(tmp_flags |
NFP_FL_OUT_FLAGS_USE_TUN);
output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
+ } else if (netif_is_lag_master(out_dev) &&
+ priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+ int gid;
+
+ output->flags = cpu_to_be16(tmp_flags);
+ gid = nfp_flower_lag_get_output_id(app, out_dev);
+ if (gid < 0)
+ return gid;
+ output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid);
} else {
/* Set action output parameters. */
output->flags = cpu_to_be16(tmp_flags);
@@ -164,7 +210,7 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
struct nfp_fl_pre_tunnel *pre_tun_act;
/* Pre_tunnel action must be first on action list.
- * If other actions already exist they need pushed forward.
+ * If other actions already exist they need to be pushed forward.
*/
if (act_len)
memmove(act_data + act_size, act_data, act_len);
@@ -443,42 +489,73 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
}
static int
-nfp_flower_loop_action(const struct tc_action *a,
+nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a,
+ struct nfp_fl_payload *nfp_fl, int *a_len,
+ struct net_device *netdev, bool last,
+ enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
+ int *out_cnt)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_output *output;
+ int err, prelag_size;
+
+ if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
+ err = nfp_fl_output(app, output, a, nfp_fl, last, netdev, *tun_type,
+ tun_out_cnt);
+ if (err)
+ return err;
+
+ *a_len += sizeof(struct nfp_fl_output);
+
+ if (priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+ /* nfp_fl_pre_lag returns -err or size of prelag action added.
+ * This will be 0 if it is not egressing to a lag dev.
+ */
+ prelag_size = nfp_fl_pre_lag(app, a, nfp_fl, *a_len);
+ if (prelag_size < 0)
+ return prelag_size;
+ else if (prelag_size > 0 && (!last || *out_cnt))
+ return -EOPNOTSUPP;
+
+ *a_len += prelag_size;
+ }
+ (*out_cnt)++;
+
+ return 0;
+}
+
+static int
+nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
struct nfp_fl_payload *nfp_fl, int *a_len,
struct net_device *netdev,
- enum nfp_flower_tun_type *tun_type, int *tun_out_cnt)
+ enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
+ int *out_cnt)
{
struct nfp_fl_set_ipv4_udp_tun *set_tun;
struct nfp_fl_pre_tunnel *pre_tun;
struct nfp_fl_push_vlan *psh_v;
struct nfp_fl_pop_vlan *pop_v;
- struct nfp_fl_output *output;
int err;
if (is_tcf_gact_shot(a)) {
nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP);
} else if (is_tcf_mirred_egress_redirect(a)) {
- if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
- return -EOPNOTSUPP;
-
- output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type,
- tun_out_cnt);
+ err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
+ true, tun_type, tun_out_cnt,
+ out_cnt);
if (err)
return err;
- *a_len += sizeof(struct nfp_fl_output);
} else if (is_tcf_mirred_egress_mirror(a)) {
- if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
- return -EOPNOTSUPP;
-
- output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type,
- tun_out_cnt);
+ err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
+ false, tun_type, tun_out_cnt,
+ out_cnt);
if (err)
return err;
- *a_len += sizeof(struct nfp_fl_output);
} else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ)
return -EOPNOTSUPP;
@@ -535,11 +612,12 @@ nfp_flower_loop_action(const struct tc_action *a,
return 0;
}
-int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+int nfp_flower_compile_action(struct nfp_app *app,
+ struct tc_cls_flower_offload *flow,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow)
{
- int act_len, act_cnt, err, tun_out_cnt;
+ int act_len, act_cnt, err, tun_out_cnt, out_cnt;
enum nfp_flower_tun_type tun_type;
const struct tc_action *a;
LIST_HEAD(actions);
@@ -550,11 +628,12 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
act_len = 0;
act_cnt = 0;
tun_out_cnt = 0;
+ out_cnt = 0;
tcf_exts_to_list(flow->exts, &actions);
list_for_each_entry(a, &actions, list) {
- err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev,
- &tun_type, &tun_out_cnt);
+ err = nfp_flower_loop_action(app, a, nfp_flow, &act_len, netdev,
+ &tun_type, &tun_out_cnt, &out_cnt);
if (err)
return err;
act_cnt++;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 577659f332e4..cb8565222621 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -239,8 +239,10 @@ nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb)
static void
nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
{
+ struct nfp_flower_priv *app_priv = app->priv;
struct nfp_flower_cmsg_hdr *cmsg_hdr;
enum nfp_flower_cmsg_type_port type;
+ bool skb_stored = false;
cmsg_hdr = nfp_flower_cmsg_get_hdr(skb);
@@ -258,13 +260,20 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
nfp_tunnel_keep_alive(app, skb);
break;
+ case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG:
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+ skb_stored = nfp_flower_lag_unprocessed_msg(app, skb);
+ break;
+ }
+ /* fall through */
default:
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
type);
goto out;
}
- dev_consume_skb_any(skb);
+ if (!skb_stored)
+ dev_consume_skb_any(skb);
return;
out:
dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index bee4367a2c38..4a7f3510a296 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -92,6 +92,7 @@
#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12
#define NFP_FL_ACTION_OPCODE_SET_UDP 14
#define NFP_FL_ACTION_OPCODE_SET_TCP 15
+#define NFP_FL_ACTION_OPCODE_PRE_LAG 16
#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17
#define NFP_FL_ACTION_OPCODE_NUM 32
@@ -103,6 +104,9 @@
#define NFP_FL_PUSH_VLAN_CFI BIT(12)
#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0)
+/* LAG ports */
+#define NFP_FL_LAG_OUT 0xC0DE0000
+
/* Tunnel ports */
#define NFP_FL_PORT_TYPE_TUN 0x50000000
#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4)
@@ -177,6 +181,15 @@ struct nfp_fl_pop_vlan {
__be16 reserved;
};
+struct nfp_fl_pre_lag {
+ struct nfp_fl_act_head head;
+ __be16 group_id;
+ u8 lag_version[3];
+ u8 instance;
+};
+
+#define NFP_FL_PRE_LAG_VER_OFF 8
+
struct nfp_fl_pre_tunnel {
struct nfp_fl_act_head head;
__be16 reserved;
@@ -366,6 +379,7 @@ struct nfp_flower_cmsg_hdr {
enum nfp_flower_cmsg_type_port {
NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0,
NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2,
+ NFP_FLOWER_CMSG_TYPE_LAG_CONFIG = 4,
NFP_FLOWER_CMSG_TYPE_PORT_REIFY = 6,
NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7,
NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
new file mode 100644
index 000000000000..0c4c957717ea
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c
@@ -0,0 +1,726 @@
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "main.h"
+
+/* LAG group config flags. */
+#define NFP_FL_LAG_LAST BIT(1)
+#define NFP_FL_LAG_FIRST BIT(2)
+#define NFP_FL_LAG_DATA BIT(3)
+#define NFP_FL_LAG_XON BIT(4)
+#define NFP_FL_LAG_SYNC BIT(5)
+#define NFP_FL_LAG_SWITCH BIT(6)
+#define NFP_FL_LAG_RESET BIT(7)
+
+/* LAG port state flags. */
+#define NFP_PORT_LAG_LINK_UP BIT(0)
+#define NFP_PORT_LAG_TX_ENABLED BIT(1)
+#define NFP_PORT_LAG_CHANGED BIT(2)
+
+enum nfp_fl_lag_batch {
+ NFP_FL_LAG_BATCH_FIRST,
+ NFP_FL_LAG_BATCH_MEMBER,
+ NFP_FL_LAG_BATCH_FINISHED
+};
+
+/**
+ * struct nfp_flower_cmsg_lag_config - control message payload for LAG config
+ * @ctrl_flags: Configuration flags
+ * @reserved: Reserved for future use
+ * @ttl: Time to live of packet - host always sets to 0xff
+ * @pkt_number: Config message packet number - increment for each message
+ * @batch_ver: Batch version of messages - increment for each batch of messages
+ * @group_id: Group ID applicable
+ * @group_inst: Group instance number - increment when group is reused
+ * @members: Array of 32-bit words listing all active group members
+ */
+struct nfp_flower_cmsg_lag_config {
+ u8 ctrl_flags;
+ u8 reserved[2];
+ u8 ttl;
+ __be32 pkt_number;
+ __be32 batch_ver;
+ __be32 group_id;
+ __be32 group_inst;
+ __be32 members[];
+};
+
+/**
+ * struct nfp_fl_lag_group - list entry for each LAG group
+ * @group_id: Assigned group ID for host/kernel sync
+ * @group_inst: Group instance in case of ID reuse
+ * @list: List entry
+ * @master_ndev: Group master Netdev
+ * @dirty: Marked if the group needs synced to HW
+ * @offloaded: Marked if the group is currently offloaded to NIC
+ * @to_remove: Marked if the group should be removed from NIC
+ * @to_destroy: Marked if the group should be removed from driver
+ * @slave_cnt: Number of slaves in group
+ */
+struct nfp_fl_lag_group {
+ unsigned int group_id;
+ u8 group_inst;
+ struct list_head list;
+ struct net_device *master_ndev;
+ bool dirty;
+ bool offloaded;
+ bool to_remove;
+ bool to_destroy;
+ unsigned int slave_cnt;
+};
+
+#define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0)
+#define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0)
+#define NFP_FL_LAG_HOST_TTL 0xff
+
+/* Use this ID with zero members to ack a batch config */
+#define NFP_FL_LAG_SYNC_ID 0
+#define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */
+#define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */
+
+/* wait for more config */
+#define NFP_FL_LAG_DELAY (msecs_to_jiffies(2))
+
+#define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */
+
+static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag)
+{
+ lag->pkt_num++;
+ lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK;
+
+ return lag->pkt_num;
+}
+
+static void nfp_fl_increment_version(struct nfp_fl_lag *lag)
+{
+ /* LSB is not considered by firmware so add 2 for each increment. */
+ lag->batch_ver += 2;
+ lag->batch_ver &= NFP_FL_LAG_VERSION_MASK;
+
+ /* Zero is reserved by firmware. */
+ if (!lag->batch_ver)
+ lag->batch_ver += 2;
+}
+
+static struct nfp_fl_lag_group *
+nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master)
+{
+ struct nfp_fl_lag_group *group;
+ struct nfp_flower_priv *priv;
+ int id;
+
+ priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+ id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN,
+ NFP_FL_LAG_GROUP_MAX, GFP_KERNEL);
+ if (id < 0) {
+ nfp_flower_cmsg_warn(priv->app,
+ "No more bonding groups available\n");
+ return ERR_PTR(id);
+ }
+
+ group = kmalloc(sizeof(*group), GFP_KERNEL);
+ if (!group) {
+ ida_simple_remove(&lag->ida_handle, id);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ group->group_id = id;
+ group->master_ndev = master;
+ group->dirty = true;
+ group->offloaded = false;
+ group->to_remove = false;
+ group->to_destroy = false;
+ group->slave_cnt = 0;
+ group->group_inst = ++lag->global_inst;
+ list_add_tail(&group->list, &lag->group_list);
+
+ return group;
+}
+
+static struct nfp_fl_lag_group *
+nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag,
+ struct net_device *master)
+{
+ struct nfp_fl_lag_group *entry;
+
+ if (!master)
+ return NULL;
+
+ list_for_each_entry(entry, &lag->group_list, list)
+ if (entry->master_ndev == master)
+ return entry;
+
+ return NULL;
+}
+
+int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
+ struct net_device *master,
+ struct nfp_fl_pre_lag *pre_act)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_lag_group *group = NULL;
+ __be32 temp_vers;
+
+ mutex_lock(&priv->nfp_lag.lock);
+ group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
+ master);
+ if (!group) {
+ mutex_unlock(&priv->nfp_lag.lock);
+ return -ENOENT;
+ }
+
+ pre_act->group_id = cpu_to_be16(group->group_id);
+ temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver <<
+ NFP_FL_PRE_LAG_VER_OFF);
+ memcpy(pre_act->lag_version, &temp_vers, 3);
+ pre_act->instance = group->group_inst;
+ mutex_unlock(&priv->nfp_lag.lock);
+
+ return 0;
+}
+
+int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_lag_group *group = NULL;
+ int group_id = -ENOENT;
+
+ mutex_lock(&priv->nfp_lag.lock);
+ group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
+ master);
+ if (group)
+ group_id = group->group_id;
+ mutex_unlock(&priv->nfp_lag.lock);
+
+ return group_id;
+}
+
+static int
+nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group,
+ struct net_device **active_members,
+ unsigned int member_cnt, enum nfp_fl_lag_batch *batch)
+{
+ struct nfp_flower_cmsg_lag_config *cmsg_payload;
+ struct nfp_flower_priv *priv;
+ unsigned long int flags;
+ unsigned int size, i;
+ struct sk_buff *skb;
+
+ priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+ size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt;
+ skb = nfp_flower_cmsg_alloc(priv->app, size,
+ NFP_FLOWER_CMSG_TYPE_LAG_CONFIG,
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ cmsg_payload = nfp_flower_cmsg_get_data(skb);
+ flags = 0;
+
+ /* Increment batch version for each new batch of config messages. */
+ if (*batch == NFP_FL_LAG_BATCH_FIRST) {
+ flags |= NFP_FL_LAG_FIRST;
+ nfp_fl_increment_version(lag);
+ *batch = NFP_FL_LAG_BATCH_MEMBER;
+ }
+
+ /* If it is a reset msg then it is also the end of the batch. */
+ if (lag->rst_cfg) {
+ flags |= NFP_FL_LAG_RESET;
+ *batch = NFP_FL_LAG_BATCH_FINISHED;
+ }
+
+ /* To signal the end of a batch, both the switch and last flags are set
+ * and the the reserved SYNC group ID is used.
+ */
+ if (*batch == NFP_FL_LAG_BATCH_FINISHED) {
+ flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST;
+ lag->rst_cfg = false;
+ cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID);
+ cmsg_payload->group_inst = 0;
+ } else {
+ cmsg_payload->group_id = cpu_to_be32(group->group_id);
+ cmsg_payload->group_inst = cpu_to_be32(group->group_inst);
+ }
+
+ cmsg_payload->reserved[0] = 0;
+ cmsg_payload->reserved[1] = 0;
+ cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL;
+ cmsg_payload->ctrl_flags = flags;
+ cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver);
+ cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag));
+
+ for (i = 0; i < member_cnt; i++)
+ cmsg_payload->members[i] =
+ cpu_to_be32(nfp_repr_get_port_id(active_members[i]));
+
+ nfp_ctrl_tx(priv->app->ctrl, skb);
+ return 0;
+}
+
+static void nfp_fl_lag_do_work(struct work_struct *work)
+{
+ enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
+ struct nfp_fl_lag_group *entry, *storage;
+ struct delayed_work *delayed_work;
+ struct nfp_flower_priv *priv;
+ struct nfp_fl_lag *lag;
+ int err;
+
+ delayed_work = to_delayed_work(work);
+ lag = container_of(delayed_work, struct nfp_fl_lag, work);
+ priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+ mutex_lock(&lag->lock);
+ list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
+ struct net_device *iter_netdev, **acti_netdevs;
+ struct nfp_flower_repr_priv *repr_priv;
+ int active_count = 0, slaves = 0;
+ struct nfp_repr *repr;
+ unsigned long *flags;
+
+ if (entry->to_remove) {
+ /* Active count of 0 deletes group on hw. */
+ err = nfp_fl_lag_config_group(lag, entry, NULL, 0,
+ &batch);
+ if (!err) {
+ entry->to_remove = false;
+ entry->offloaded = false;
+ } else {
+ nfp_flower_cmsg_warn(priv->app,
+ "group delete failed\n");
+ schedule_delayed_work(&lag->work,
+ NFP_FL_LAG_DELAY);
+ continue;
+ }
+
+ if (entry->to_destroy) {
+ ida_simple_remove(&lag->ida_handle,
+ entry->group_id);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ continue;
+ }
+
+ acti_netdevs = kmalloc_array(entry->slave_cnt,
+ sizeof(*acti_netdevs), GFP_KERNEL);
+
+ /* Include sanity check in the loop. It may be that a bond has
+ * changed between processing the last notification and the
+ * work queue triggering. If the number of slaves has changed
+ * or it now contains netdevs that cannot be offloaded, ignore
+ * the group until pending notifications are processed.
+ */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) {
+ if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
+ slaves = 0;
+ break;
+ }
+
+ repr = netdev_priv(iter_netdev);
+
+ if (repr->app != priv->app) {
+ slaves = 0;
+ break;
+ }
+
+ slaves++;
+ if (slaves > entry->slave_cnt)
+ break;
+
+ /* Check the ports for state changes. */
+ repr_priv = repr->app_priv;
+ flags = &repr_priv->lag_port_flags;
+
+ if (*flags & NFP_PORT_LAG_CHANGED) {
+ *flags &= ~NFP_PORT_LAG_CHANGED;
+ entry->dirty = true;
+ }
+
+ if ((*flags & NFP_PORT_LAG_TX_ENABLED) &&
+ (*flags & NFP_PORT_LAG_LINK_UP))
+ acti_netdevs[active_count++] = iter_netdev;
+ }
+ rcu_read_unlock();
+
+ if (slaves != entry->slave_cnt || !entry->dirty) {
+ kfree(acti_netdevs);
+ continue;
+ }
+
+ err = nfp_fl_lag_config_group(lag, entry, acti_netdevs,
+ active_count, &batch);
+ if (!err) {
+ entry->offloaded = true;
+ entry->dirty = false;
+ } else {
+ nfp_flower_cmsg_warn(priv->app,
+ "group offload failed\n");
+ schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+ }
+
+ kfree(acti_netdevs);
+ }
+
+ /* End the config batch if at least one packet has been batched. */
+ if (batch == NFP_FL_LAG_BATCH_MEMBER) {
+ batch = NFP_FL_LAG_BATCH_FINISHED;
+ err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
+ if (err)
+ nfp_flower_cmsg_warn(priv->app,
+ "group batch end cmsg failed\n");
+ }
+
+ mutex_unlock(&lag->lock);
+}
+
+static int
+nfp_fl_lag_put_unprocessed(struct nfp_fl_lag *lag, struct sk_buff *skb)
+{
+ struct nfp_flower_cmsg_lag_config *cmsg_payload;
+
+ cmsg_payload = nfp_flower_cmsg_get_data(skb);
+ if (be32_to_cpu(cmsg_payload->group_id) >= NFP_FL_LAG_GROUP_MAX)
+ return -EINVAL;
+
+ /* Drop cmsg retrans if storage limit is exceeded to prevent
+ * overloading. If the fw notices that expected messages have not been
+ * received in a given time block, it will request a full resync.
+ */
+ if (skb_queue_len(&lag->retrans_skbs) >= NFP_FL_LAG_RETRANS_LIMIT)
+ return -ENOSPC;
+
+ __skb_queue_tail(&lag->retrans_skbs, skb);
+
+ return 0;
+}
+
+static void nfp_fl_send_unprocessed(struct nfp_fl_lag *lag)
+{
+ struct nfp_flower_priv *priv;
+ struct sk_buff *skb;
+
+ priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+ while ((skb = __skb_dequeue(&lag->retrans_skbs)))
+ nfp_ctrl_tx(priv->app->ctrl, skb);
+}
+
+bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb)
+{
+ struct nfp_flower_cmsg_lag_config *cmsg_payload;
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_fl_lag_group *group_entry;
+ unsigned long int flags;
+ bool store_skb = false;
+ int err;
+
+ cmsg_payload = nfp_flower_cmsg_get_data(skb);
+ flags = cmsg_payload->ctrl_flags;
+
+ /* Note the intentional fall through below. If DATA and XON are both
+ * set, the message will stored and sent again with the rest of the
+ * unprocessed messages list.
+ */
+
+ /* Store */
+ if (flags & NFP_FL_LAG_DATA)
+ if (!nfp_fl_lag_put_unprocessed(&priv->nfp_lag, skb))
+ store_skb = true;
+
+ /* Send stored */
+ if (flags & NFP_FL_LAG_XON)
+ nfp_fl_send_unprocessed(&priv->nfp_lag);
+
+ /* Resend all */
+ if (flags & NFP_FL_LAG_SYNC) {
+ /* To resend all config:
+ * 1) Clear all unprocessed messages
+ * 2) Mark all groups dirty
+ * 3) Reset NFP group config
+ * 4) Schedule a LAG config update
+ */
+
+ __skb_queue_purge(&priv->nfp_lag.retrans_skbs);
+
+ mutex_lock(&priv->nfp_lag.lock);
+ list_for_each_entry(group_entry, &priv->nfp_lag.group_list,
+ list)
+ group_entry->dirty = true;
+
+ err = nfp_flower_lag_reset(&priv->nfp_lag);
+ if (err)
+ nfp_flower_cmsg_warn(priv->app,
+ "mem err in group reset msg\n");
+ mutex_unlock(&priv->nfp_lag.lock);
+
+ schedule_delayed_work(&priv->nfp_lag.work, 0);
+ }
+
+ return store_skb;
+}
+
+static void
+nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag,
+ struct nfp_fl_lag_group *group)
+{
+ group->to_remove = true;
+
+ schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+}
+
+static int
+nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag,
+ struct net_device *master)
+{
+ struct nfp_fl_lag_group *group;
+
+ mutex_lock(&lag->lock);
+ group = nfp_fl_lag_find_group_for_master_with_lag(lag, master);
+ if (!group) {
+ mutex_unlock(&lag->lock);
+ return -ENOENT;
+ }
+
+ group->to_remove = true;
+ group->to_destroy = true;
+ mutex_unlock(&lag->lock);
+
+ schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+ return 0;
+}
+
+static int
+nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *upper = info->upper_dev, *iter_netdev;
+ struct netdev_lag_upper_info *lag_upper_info;
+ struct nfp_fl_lag_group *group;
+ struct nfp_flower_priv *priv;
+ unsigned int slave_count = 0;
+ bool can_offload = true;
+ struct nfp_repr *repr;
+
+ if (!netif_is_lag_master(upper))
+ return 0;
+
+ priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, iter_netdev) {
+ if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
+ can_offload = false;
+ break;
+ }
+ repr = netdev_priv(iter_netdev);
+
+ /* Ensure all ports are created by the same app/on same card. */
+ if (repr->app != priv->app) {
+ can_offload = false;
+ break;
+ }
+
+ slave_count++;
+ }
+ rcu_read_unlock();
+
+ lag_upper_info = info->upper_info;
+
+ /* Firmware supports active/backup and L3/L4 hash bonds. */
+ if (lag_upper_info &&
+ lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH ||
+ (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 &&
+ lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) {
+ can_offload = false;
+ nfp_flower_cmsg_warn(priv->app,
+ "Unable to offload tx_type %u hash %u\n",
+ lag_upper_info->tx_type,
+ lag_upper_info->hash_type);
+ }
+
+ mutex_lock(&lag->lock);
+ group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper);
+
+ if (slave_count == 0 || !can_offload) {
+ /* Cannot offload the group - remove if previously offloaded. */
+ if (group && group->offloaded)
+ nfp_fl_lag_schedule_group_remove(lag, group);
+
+ mutex_unlock(&lag->lock);
+ return 0;
+ }
+
+ if (!group) {
+ group = nfp_fl_lag_group_create(lag, upper);
+ if (IS_ERR(group)) {
+ mutex_unlock(&lag->lock);
+ return PTR_ERR(group);
+ }
+ }
+
+ group->dirty = true;
+ group->slave_cnt = slave_count;
+
+ /* Group may have been on queue for removal but is now offfloable. */
+ group->to_remove = false;
+ mutex_unlock(&lag->lock);
+
+ schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+ return 0;
+}
+
+static int
+nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct netdev_lag_lower_state_info *lag_lower_info;
+ struct nfp_flower_repr_priv *repr_priv;
+ struct nfp_flower_priv *priv;
+ struct nfp_repr *repr;
+ unsigned long *flags;
+
+ if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev))
+ return 0;
+
+ lag_lower_info = info->lower_state_info;
+ if (!lag_lower_info)
+ return 0;
+
+ priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+ repr = netdev_priv(netdev);
+
+ /* Verify that the repr is associated with this app. */
+ if (repr->app != priv->app)
+ return 0;
+
+ repr_priv = repr->app_priv;
+ flags = &repr_priv->lag_port_flags;
+
+ mutex_lock(&lag->lock);
+ if (lag_lower_info->link_up)
+ *flags |= NFP_PORT_LAG_LINK_UP;
+ else
+ *flags &= ~NFP_PORT_LAG_LINK_UP;
+
+ if (lag_lower_info->tx_enabled)
+ *flags |= NFP_PORT_LAG_TX_ENABLED;
+ else
+ *flags &= ~NFP_PORT_LAG_TX_ENABLED;
+
+ *flags |= NFP_PORT_LAG_CHANGED;
+ mutex_unlock(&lag->lock);
+
+ schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+ return 0;
+}
+
+static int
+nfp_fl_lag_netdev_event(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct net_device *netdev;
+ struct nfp_fl_lag *lag;
+ int err;
+
+ netdev = netdev_notifier_info_to_dev(ptr);
+ lag = container_of(nb, struct nfp_fl_lag, lag_nb);
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ err = nfp_fl_lag_changeupper_event(lag, ptr);
+ if (err)
+ return NOTIFY_BAD;
+ return NOTIFY_OK;
+ case NETDEV_CHANGELOWERSTATE:
+ err = nfp_fl_lag_changels_event(lag, netdev, ptr);
+ if (err)
+ return NOTIFY_BAD;
+ return NOTIFY_OK;
+ case NETDEV_UNREGISTER:
+ if (netif_is_bond_master(netdev)) {
+ err = nfp_fl_lag_schedule_group_delete(lag, netdev);
+ if (err)
+ return NOTIFY_BAD;
+ return NOTIFY_OK;
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+int nfp_flower_lag_reset(struct nfp_fl_lag *lag)
+{
+ enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
+
+ lag->rst_cfg = true;
+ return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
+}
+
+void nfp_flower_lag_init(struct nfp_fl_lag *lag)
+{
+ INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work);
+ INIT_LIST_HEAD(&lag->group_list);
+ mutex_init(&lag->lock);
+ ida_init(&lag->ida_handle);
+
+ __skb_queue_head_init(&lag->retrans_skbs);
+
+ /* 0 is a reserved batch version so increment to first valid value. */
+ nfp_fl_increment_version(lag);
+
+ lag->lag_nb.notifier_call = nfp_fl_lag_netdev_event;
+}
+
+void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag)
+{
+ struct nfp_fl_lag_group *entry, *storage;
+
+ cancel_delayed_work_sync(&lag->work);
+
+ __skb_queue_purge(&lag->retrans_skbs);
+
+ /* Remove all groups. */
+ mutex_lock(&lag->lock);
+ list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ mutex_unlock(&lag->lock);
+ mutex_destroy(&lag->lock);
+ ida_destroy(&lag->ida_handle);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 4e67c0cbf9f0..19cfa162ac65 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -185,6 +185,10 @@ nfp_flower_repr_netdev_init(struct nfp_app *app, struct net_device *netdev)
static void
nfp_flower_repr_netdev_clean(struct nfp_app *app, struct net_device *netdev)
{
+ struct nfp_repr *repr = netdev_priv(netdev);
+
+ kfree(repr->app_priv);
+
tc_setup_cb_egdev_unregister(netdev, nfp_flower_setup_tc_egress_cb,
netdev_priv(netdev));
}
@@ -225,7 +229,9 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp);
struct nfp_flower_priv *priv = app->priv;
atomic_t *replies = &priv->reify_replies;
+ struct nfp_flower_repr_priv *repr_priv;
enum nfp_port_type port_type;
+ struct nfp_repr *nfp_repr;
struct nfp_reprs *reprs;
int i, err, reify_cnt;
const u8 queue = 0;
@@ -248,6 +254,15 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
goto err_reprs_clean;
}
+ repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
+ if (!repr_priv) {
+ err = -ENOMEM;
+ goto err_reprs_clean;
+ }
+
+ nfp_repr = netdev_priv(repr);
+ nfp_repr->app_priv = repr_priv;
+
/* For now we only support 1 PF */
WARN_ON(repr_type == NFP_REPR_TYPE_PF && i);
@@ -324,6 +339,8 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
{
struct nfp_eth_table *eth_tbl = app->pf->eth_tbl;
atomic_t *replies = &priv->reify_replies;
+ struct nfp_flower_repr_priv *repr_priv;
+ struct nfp_repr *nfp_repr;
struct sk_buff *ctrl_skb;
struct nfp_reprs *reprs;
int err, reify_cnt;
@@ -351,6 +368,15 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
goto err_reprs_clean;
}
+ repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL);
+ if (!repr_priv) {
+ err = -ENOMEM;
+ goto err_reprs_clean;
+ }
+
+ nfp_repr = netdev_priv(repr);
+ nfp_repr->app_priv = repr_priv;
+
port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr);
if (IS_ERR(port)) {
err = PTR_ERR(port);
@@ -546,8 +572,22 @@ static int nfp_flower_init(struct nfp_app *app)
else
app_priv->flower_ext_feats = features;
+ /* Tell the firmware that the driver supports lag. */
+ err = nfp_rtsym_write_le(app->pf->rtbl,
+ "_abi_flower_balance_sync_enable", 1);
+ if (!err) {
+ app_priv->flower_ext_feats |= NFP_FL_FEATS_LAG;
+ nfp_flower_lag_init(&app_priv->nfp_lag);
+ } else if (err == -ENOENT) {
+ nfp_warn(app->cpp, "LAG not supported by FW.\n");
+ } else {
+ goto err_cleanup_metadata;
+ }
+
return 0;
+err_cleanup_metadata:
+ nfp_flower_metadata_cleanup(app);
err_free_app_priv:
vfree(app->priv);
return err;
@@ -561,6 +601,9 @@ static void nfp_flower_clean(struct nfp_app *app)
skb_queue_purge(&app_priv->cmsg_skbs_low);
flush_work(&app_priv->cmsg_work);
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+ nfp_flower_lag_cleanup(&app_priv->nfp_lag);
+
nfp_flower_metadata_cleanup(app);
vfree(app->priv);
app->priv = NULL;
@@ -627,11 +670,29 @@ nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev,
static int nfp_flower_start(struct nfp_app *app)
{
+ struct nfp_flower_priv *app_priv = app->priv;
+ int err;
+
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+ err = nfp_flower_lag_reset(&app_priv->nfp_lag);
+ if (err)
+ return err;
+
+ err = register_netdevice_notifier(&app_priv->nfp_lag.lag_nb);
+ if (err)
+ return err;
+ }
+
return nfp_tunnel_config_start(app);
}
static void nfp_flower_stop(struct nfp_app *app)
{
+ struct nfp_flower_priv *app_priv = app->priv;
+
+ if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+ unregister_netdevice_notifier(&app_priv->nfp_lag.lag_nb);
+
nfp_tunnel_config_stop(app);
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 733ff53cc601..bbe5764d26cb 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -43,7 +43,9 @@
#include <net/pkt_cls.h>
#include <net/tcp.h>
#include <linux/workqueue.h>
+#include <linux/idr.h>
+struct nfp_fl_pre_lag;
struct net_device;
struct nfp_app;
@@ -67,6 +69,7 @@ struct nfp_app;
/* Extra features bitmap. */
#define NFP_FL_FEATS_GENEVE BIT(0)
#define NFP_FL_NBI_MTU_SETTING BIT(1)
+#define NFP_FL_FEATS_LAG BIT(31)
struct nfp_fl_mask_id {
struct circ_buf mask_id_free_list;
@@ -97,6 +100,33 @@ struct nfp_mtu_conf {
};
/**
+ * struct nfp_fl_lag - Flower APP priv data for link aggregation
+ * @lag_nb: Notifier to track master/slave events
+ * @work: Work queue for writing configs to the HW
+ * @lock: Lock to protect lag_group_list
+ * @group_list: List of all master/slave groups offloaded
+ * @ida_handle: IDA to handle group ids
+ * @pkt_num: Incremented for each config packet sent
+ * @batch_ver: Incremented for each batch of config packets
+ * @global_inst: Instance allocator for groups
+ * @rst_cfg: Marker to reset HW LAG config
+ * @retrans_skbs: Cmsgs that could not be processed by HW and require
+ * retransmission
+ */
+struct nfp_fl_lag {
+ struct notifier_block lag_nb;
+ struct delayed_work work;
+ struct mutex lock;
+ struct list_head group_list;
+ struct ida ida_handle;
+ unsigned int pkt_num;
+ unsigned int batch_ver;
+ u8 global_inst;
+ bool rst_cfg;
+ struct sk_buff_head retrans_skbs;
+};
+
+/**
* struct nfp_flower_priv - Flower APP per-vNIC priv data
* @app: Back pointer to app
* @nn: Pointer to vNIC
@@ -128,6 +158,7 @@ struct nfp_mtu_conf {
* from firmware for repr reify
* @reify_wait_queue: wait queue for repr reify response counting
* @mtu_conf: Configuration of repr MTU value
+ * @nfp_lag: Link aggregation data block
*/
struct nfp_flower_priv {
struct nfp_app *app;
@@ -157,6 +188,15 @@ struct nfp_flower_priv {
atomic_t reify_replies;
wait_queue_head_t reify_wait_queue;
struct nfp_mtu_conf mtu_conf;
+ struct nfp_fl_lag nfp_lag;
+};
+
+/**
+ * struct nfp_flower_repr_priv - Flower APP per-repr priv data
+ * @lag_port_flags: Extended port flags to record lag state of repr
+ */
+struct nfp_flower_repr_priv {
+ unsigned long lag_port_flags;
};
struct nfp_fl_key_ls {
@@ -214,7 +254,8 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow,
enum nfp_flower_tun_type tun_type);
-int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+int nfp_flower_compile_action(struct nfp_app *app,
+ struct tc_cls_flower_offload *flow,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow);
int nfp_compile_flow_metadata(struct nfp_app *app,
@@ -241,5 +282,14 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data,
void *cb_priv);
+void nfp_flower_lag_init(struct nfp_fl_lag *lag);
+void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag);
+int nfp_flower_lag_reset(struct nfp_fl_lag *lag);
+bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb);
+int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
+ struct net_device *master,
+ struct nfp_fl_pre_lag *pre_act);
+int nfp_flower_lag_get_output_id(struct nfp_app *app,
+ struct net_device *master);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 70ec9d821b91..c42e64f32333 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -440,7 +440,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
if (err)
goto err_destroy_flow;
- err = nfp_flower_compile_action(flow, netdev, flow_pay);
+ err = nfp_flower_compile_action(app, flow, netdev, flow_pay);
if (err)
goto err_destroy_flow;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index faa4e131c136..f6677bc9875a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -72,8 +72,21 @@
#define OP_BR_ADDR_LO 0x007ffc00000ULL
#define OP_BR_ADDR_HI 0x10000000000ULL
-#define nfp_is_br(_insn) \
- (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
+#define OP_BR_BIT_BASE 0x0d000000000ULL
+#define OP_BR_BIT_BASE_MASK 0x0f800080300ULL
+#define OP_BR_BIT_A_SRC 0x000000000ffULL
+#define OP_BR_BIT_B_SRC 0x0000003fc00ULL
+#define OP_BR_BIT_BV 0x00000040000ULL
+#define OP_BR_BIT_SRC_LMEXTN 0x40000000000ULL
+#define OP_BR_BIT_DEFBR OP_BR_DEFBR
+#define OP_BR_BIT_ADDR_LO OP_BR_ADDR_LO
+#define OP_BR_BIT_ADDR_HI OP_BR_ADDR_HI
+
+static inline bool nfp_is_br(u64 insn)
+{
+ return (insn & OP_BR_BASE_MASK) == OP_BR_BASE ||
+ (insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE;
+}
enum br_mask {
BR_BEQ = 0x00,
@@ -161,6 +174,7 @@ enum shf_op {
SHF_OP_NONE = 0,
SHF_OP_AND = 2,
SHF_OP_OR = 5,
+ SHF_OP_ASHR = 6,
};
enum shf_sc {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 09e87d5f4f72..117eca6819de 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -277,6 +277,7 @@ const struct net_device_ops nfp_repr_netdev_ops = {
.ndo_get_vf_config = nfp_app_get_vf_config,
.ndo_set_vf_link_state = nfp_app_set_vf_link_state,
.ndo_set_features = nfp_port_set_features,
+ .ndo_set_mac_address = eth_mac_addr,
};
static void nfp_repr_clean(struct nfp_repr *repr)
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
index c9724fb7ea4b..df599d5b6bb3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
@@ -100,6 +100,8 @@ nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name);
u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name,
int *error);
+int nfp_rtsym_write_le(struct nfp_rtsym_table *rtbl, const char *name,
+ u64 value);
u8 __iomem *
nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
unsigned int min_size, struct nfp_cpp_area **area);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
index 46107aefad1c..9e34216578da 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
@@ -286,6 +286,49 @@ exit:
return val;
}
+/**
+ * nfp_rtsym_write_le() - Write an unsigned scalar value to a symbol
+ * @rtbl: NFP RTsym table
+ * @name: Symbol name
+ * @value: Value to write
+ *
+ * Lookup a symbol and write a value to it. Symbol can be 4 or 8 bytes in size.
+ * If 4 bytes then the lower 32-bits of 'value' are used. Value will be
+ * written as simple little-endian unsigned value.
+ *
+ * Return: 0 on success or error code.
+ */
+int nfp_rtsym_write_le(struct nfp_rtsym_table *rtbl, const char *name,
+ u64 value)
+{
+ const struct nfp_rtsym *sym;
+ int err;
+ u32 id;
+
+ sym = nfp_rtsym_lookup(rtbl, name);
+ if (!sym)
+ return -ENOENT;
+
+ id = NFP_CPP_ISLAND_ID(sym->target, NFP_CPP_ACTION_RW, 0, sym->domain);
+
+ switch (sym->size) {
+ case 4:
+ err = nfp_cpp_writel(rtbl->cpp, id, sym->addr, value);
+ break;
+ case 8:
+ err = nfp_cpp_writeq(rtbl->cpp, id, sym->addr, value);
+ break;
+ default:
+ nfp_err(rtbl->cpp,
+ "rtsym '%s' unsupported or non-scalar size: %lld\n",
+ name, sym->size);
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
u8 __iomem *
nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
unsigned int min_size, struct nfp_cpp_area **area)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 5e655c3601cf..1c0d0c217936 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1677,6 +1677,8 @@ static void __qed_get_vport_tstats(struct qed_hwfn *p_hwfn,
HILO_64_REGPAIR(tstats.mftag_filter_discard);
p_stats->common.mac_filter_discards +=
HILO_64_REGPAIR(tstats.eth_mac_filter_discard);
+ p_stats->common.gft_filter_drop +=
+ HILO_64_REGPAIR(tstats.eth_gft_drop_pkt);
}
static void __qed_get_vport_ustats_addrlen(struct qed_hwfn *p_hwfn,
@@ -1973,6 +1975,8 @@ qed_arfs_mode_to_hsi(enum qed_filter_config_mode mode)
return GFT_PROFILE_TYPE_4_TUPLE;
if (mode == QED_FILTER_CONFIG_MODE_IP_DEST)
return GFT_PROFILE_TYPE_IP_DST_ADDR;
+ if (mode == QED_FILTER_CONFIG_MODE_IP_SRC)
+ return GFT_PROFILE_TYPE_IP_SRC_ADDR;
return GFT_PROFILE_TYPE_L4_DST_PORT;
}
@@ -2013,16 +2017,6 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
u8 abs_vport_id = 0;
int rc = -EINVAL;
- rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
- if (rc)
- return rc;
-
- if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) {
- rc = qed_fw_l2_queue(p_hwfn, p_params->qid, &abs_rx_q_id);
- if (rc)
- return rc;
- }
-
/* Get SPQ entry */
memset(&init_data, 0, sizeof(init_data));
init_data.cid = qed_spq_get_cid(p_hwfn);
@@ -2047,15 +2041,28 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
DMA_REGPAIR_LE(p_ramrod->pkt_hdr_addr, p_params->addr);
p_ramrod->pkt_hdr_length = cpu_to_le16(p_params->length);
- if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) {
- p_ramrod->rx_qid_valid = 1;
- p_ramrod->rx_qid = cpu_to_le16(abs_rx_q_id);
+ if (p_params->b_is_drop) {
+ p_ramrod->vport_id = cpu_to_le16(ETH_GFT_TRASHCAN_VPORT);
+ } else {
+ rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
+ if (rc)
+ return rc;
+
+ if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) {
+ rc = qed_fw_l2_queue(p_hwfn, p_params->qid,
+ &abs_rx_q_id);
+ if (rc)
+ return rc;
+
+ p_ramrod->rx_qid_valid = 1;
+ p_ramrod->rx_qid = cpu_to_le16(abs_rx_q_id);
+ }
+
+ p_ramrod->vport_id = cpu_to_le16((u16)abs_vport_id);
}
p_ramrod->flow_id_valid = 0;
p_ramrod->flow_id = 0;
-
- p_ramrod->vport_id = cpu_to_le16((u16)abs_vport_id);
p_ramrod->filter_action = p_params->b_is_add ? GFT_ADD_FILTER
: GFT_DELETE_FILTER;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 2d3f09ed413b..81c5c8dfa2ef 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -75,6 +75,7 @@ struct qede_stats_common {
u64 rx_bcast_pkts;
u64 mftag_filter_discards;
u64 mac_filter_discards;
+ u64 gft_filter_drop;
u64 tx_ucast_bytes;
u64 tx_mcast_bytes;
u64 tx_bcast_bytes;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 8c6fdad91986..6906e04b609e 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -161,6 +161,7 @@ static const struct {
QEDE_STAT(no_buff_discards),
QEDE_PF_STAT(mftag_filter_discards),
QEDE_PF_STAT(mac_filter_discards),
+ QEDE_PF_STAT(gft_filter_drop),
QEDE_STAT(tx_err_drop_pkts),
QEDE_STAT(ttl0_discard),
QEDE_STAT(packet_too_big_discard),
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index 43569b1839be..e9e088d9c815 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -38,6 +38,7 @@
#include <linux/qed/qed_if.h>
#include "qede.h"
+#define QEDE_FILTER_PRINT_MAX_LEN (64)
struct qede_arfs_tuple {
union {
__be32 src_ipv4;
@@ -51,6 +52,18 @@ struct qede_arfs_tuple {
__be16 dst_port;
__be16 eth_proto;
u8 ip_proto;
+
+ /* Describe filtering mode needed for this kind of filter */
+ enum qed_filter_config_mode mode;
+
+ /* Used to compare new/old filters. Return true if IPs match */
+ bool (*ip_comp)(struct qede_arfs_tuple *a, struct qede_arfs_tuple *b);
+
+ /* Given an address into ethhdr build a header from tuple info */
+ void (*build_hdr)(struct qede_arfs_tuple *t, void *header);
+
+ /* Stringify the tuple for a print into the provided buffer */
+ void (*stringify)(struct qede_arfs_tuple *t, void *buffer);
};
struct qede_arfs_fltr_node {
@@ -73,9 +86,11 @@ struct qede_arfs_fltr_node {
u16 sw_id;
u16 rxq_id;
u16 next_rxq_id;
+ u8 vfid;
bool filter_op;
bool used;
u8 fw_rc;
+ bool b_is_drop;
struct hlist_node node;
};
@@ -90,7 +105,9 @@ struct qede_arfs {
spinlock_t arfs_list_lock;
unsigned long *arfs_fltr_bmap;
int filter_count;
- bool enable;
+
+ /* Currently configured filtering mode */
+ enum qed_filter_config_mode mode;
};
static void qede_configure_arfs_fltr(struct qede_dev *edev,
@@ -109,12 +126,22 @@ static void qede_configure_arfs_fltr(struct qede_dev *edev,
params.length = n->buf_len;
params.qid = rxq_id;
params.b_is_add = add_fltr;
+ params.b_is_drop = n->b_is_drop;
+
+ if (n->vfid) {
+ params.b_is_vf = true;
+ params.vf_id = n->vfid - 1;
+ }
- DP_VERBOSE(edev, NETIF_MSG_RX_STATUS,
- "%s arfs filter flow_id=%d, sw_id=%d, src_port=%d, dst_port=%d, rxq=%d\n",
- add_fltr ? "Adding" : "Deleting",
- n->flow_id, n->sw_id, ntohs(n->tuple.src_port),
- ntohs(n->tuple.dst_port), rxq_id);
+ if (n->tuple.stringify) {
+ char tuple_buffer[QEDE_FILTER_PRINT_MAX_LEN];
+
+ n->tuple.stringify(&n->tuple, tuple_buffer);
+ DP_VERBOSE(edev, NETIF_MSG_RX_STATUS,
+ "%s sw_id[0x%x]: %s [vf %u queue %d]\n",
+ add_fltr ? "Adding" : "Deleting",
+ n->sw_id, tuple_buffer, n->vfid, rxq_id);
+ }
n->used = true;
n->filter_op = add_fltr;
@@ -145,14 +172,13 @@ qede_enqueue_fltr_and_config_searcher(struct qede_dev *edev,
INIT_HLIST_NODE(&fltr->node);
hlist_add_head(&fltr->node,
QEDE_ARFS_BUCKET_HEAD(edev, bucket_idx));
- edev->arfs->filter_count++;
-
- if (edev->arfs->filter_count == 1 && !edev->arfs->enable) {
- enum qed_filter_config_mode mode;
- mode = QED_FILTER_CONFIG_MODE_5_TUPLE;
- edev->ops->configure_arfs_searcher(edev->cdev, mode);
- edev->arfs->enable = true;
+ edev->arfs->filter_count++;
+ if (edev->arfs->filter_count == 1 &&
+ edev->arfs->mode == QED_FILTER_CONFIG_MODE_DISABLE) {
+ edev->ops->configure_arfs_searcher(edev->cdev,
+ fltr->tuple.mode);
+ edev->arfs->mode = fltr->tuple.mode;
}
return 0;
@@ -167,14 +193,15 @@ qede_dequeue_fltr_and_config_searcher(struct qede_dev *edev,
fltr->buf_len, DMA_TO_DEVICE);
qede_free_arfs_filter(edev, fltr);
- edev->arfs->filter_count--;
- if (!edev->arfs->filter_count && edev->arfs->enable) {
+ edev->arfs->filter_count--;
+ if (!edev->arfs->filter_count &&
+ edev->arfs->mode != QED_FILTER_CONFIG_MODE_DISABLE) {
enum qed_filter_config_mode mode;
mode = QED_FILTER_CONFIG_MODE_DISABLE;
- edev->arfs->enable = false;
edev->ops->configure_arfs_searcher(edev->cdev, mode);
+ edev->arfs->mode = QED_FILTER_CONFIG_MODE_DISABLE;
}
}
@@ -264,25 +291,17 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr)
}
}
+#ifdef CONFIG_RFS_ACCEL
spin_lock_bh(&edev->arfs->arfs_list_lock);
- if (!edev->arfs->filter_count) {
- if (edev->arfs->enable) {
- enum qed_filter_config_mode mode;
-
- mode = QED_FILTER_CONFIG_MODE_DISABLE;
- edev->arfs->enable = false;
- edev->ops->configure_arfs_searcher(edev->cdev, mode);
- }
-#ifdef CONFIG_RFS_ACCEL
- } else {
+ if (edev->arfs->filter_count) {
set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task,
QEDE_SP_TASK_POLL_DELAY);
-#endif
}
spin_unlock_bh(&edev->arfs->arfs_list_lock);
+#endif
}
/* This function waits until all aRFS filters get deleted and freed.
@@ -512,6 +531,7 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
eth->h_proto = skb->protocol;
n->tuple.eth_proto = skb->protocol;
n->tuple.ip_proto = ip_proto;
+ n->tuple.mode = QED_FILTER_CONFIG_MODE_5_TUPLE;
memcpy(n->data + ETH_HLEN, skb->data, skb_headlen(skb));
rc = qede_enqueue_fltr_and_config_searcher(edev, n, tbl_idx);
@@ -1339,38 +1359,6 @@ qede_get_arfs_fltr_by_loc(struct hlist_head *head, u32 location)
return NULL;
}
-static bool
-qede_compare_user_flow_ips(struct qede_arfs_fltr_node *tpos,
- struct ethtool_rx_flow_spec *fsp,
- __be16 proto)
-{
- if (proto == htons(ETH_P_IP)) {
- struct ethtool_tcpip4_spec *ip;
-
- ip = &fsp->h_u.tcp_ip4_spec;
-
- if (tpos->tuple.src_ipv4 == ip->ip4src &&
- tpos->tuple.dst_ipv4 == ip->ip4dst)
- return true;
- else
- return false;
- } else {
- struct ethtool_tcpip6_spec *ip6;
- struct in6_addr *src;
-
- ip6 = &fsp->h_u.tcp_ip6_spec;
- src = &tpos->tuple.src_ipv6;
-
- if (!memcmp(src, &ip6->ip6src, sizeof(struct in6_addr)) &&
- !memcmp(&tpos->tuple.dst_ipv6, &ip6->ip6dst,
- sizeof(struct in6_addr)))
- return true;
- else
- return false;
- }
- return false;
-}
-
int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info,
u32 *rule_locs)
{
@@ -1455,102 +1443,444 @@ int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd)
fsp->ring_cookie = fltr->rxq_id;
+ if (fltr->vfid) {
+ fsp->ring_cookie |= ((u64)fltr->vfid) <<
+ ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+ }
+
+ if (fltr->b_is_drop)
+ fsp->ring_cookie = RX_CLS_FLOW_DISC;
unlock:
__qede_unlock(edev);
return rc;
}
static int
-qede_validate_and_check_flow_exist(struct qede_dev *edev,
- struct ethtool_rx_flow_spec *fsp,
- int *min_hlen)
+qede_poll_arfs_filter_config(struct qede_dev *edev,
+ struct qede_arfs_fltr_node *fltr)
{
- __be16 src_port = 0x0, dst_port = 0x0;
- struct qede_arfs_fltr_node *fltr;
- struct hlist_node *temp;
- struct hlist_head *head;
- __be16 eth_proto;
- u8 ip_proto;
+ int count = QEDE_ARFS_POLL_COUNT;
- if (fsp->location >= QEDE_RFS_MAX_FLTR ||
- fsp->ring_cookie >= QEDE_RSS_COUNT(edev))
- return -EINVAL;
+ while (fltr->used && count) {
+ msleep(20);
+ count--;
+ }
+
+ if (count == 0 || fltr->fw_rc) {
+ DP_NOTICE(edev, "Timeout in polling filter config\n");
+ qede_dequeue_fltr_and_config_searcher(edev, fltr);
+ return -EIO;
+ }
+
+ return fltr->fw_rc;
+}
+
+static int qede_flow_get_min_header_size(struct qede_arfs_tuple *t)
+{
+ int size = ETH_HLEN;
+
+ if (t->eth_proto == htons(ETH_P_IP))
+ size += sizeof(struct iphdr);
+ else
+ size += sizeof(struct ipv6hdr);
+
+ if (t->ip_proto == IPPROTO_TCP)
+ size += sizeof(struct tcphdr);
+ else
+ size += sizeof(struct udphdr);
+
+ return size;
+}
+
+static bool qede_flow_spec_ipv4_cmp(struct qede_arfs_tuple *a,
+ struct qede_arfs_tuple *b)
+{
+ if (a->eth_proto != htons(ETH_P_IP) ||
+ b->eth_proto != htons(ETH_P_IP))
+ return false;
+
+ return (a->src_ipv4 == b->src_ipv4) &&
+ (a->dst_ipv4 == b->dst_ipv4);
+}
+
+static void qede_flow_build_ipv4_hdr(struct qede_arfs_tuple *t,
+ void *header)
+{
+ __be16 *ports = (__be16 *)(header + ETH_HLEN + sizeof(struct iphdr));
+ struct iphdr *ip = (struct iphdr *)(header + ETH_HLEN);
+ struct ethhdr *eth = (struct ethhdr *)header;
+
+ eth->h_proto = t->eth_proto;
+ ip->saddr = t->src_ipv4;
+ ip->daddr = t->dst_ipv4;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->protocol = t->ip_proto;
+ ip->tot_len = cpu_to_be16(qede_flow_get_min_header_size(t) - ETH_HLEN);
+
+ /* ports is weakly typed to suit both TCP and UDP ports */
+ ports[0] = t->src_port;
+ ports[1] = t->dst_port;
+}
+
+static void qede_flow_stringify_ipv4_hdr(struct qede_arfs_tuple *t,
+ void *buffer)
+{
+ const char *prefix = t->ip_proto == IPPROTO_TCP ? "TCP" : "UDP";
+
+ snprintf(buffer, QEDE_FILTER_PRINT_MAX_LEN,
+ "%s %pI4 (%04x) -> %pI4 (%04x)",
+ prefix, &t->src_ipv4, t->src_port,
+ &t->dst_ipv4, t->dst_port);
+}
+
+static bool qede_flow_spec_ipv6_cmp(struct qede_arfs_tuple *a,
+ struct qede_arfs_tuple *b)
+{
+ if (a->eth_proto != htons(ETH_P_IPV6) ||
+ b->eth_proto != htons(ETH_P_IPV6))
+ return false;
+
+ if (memcmp(&a->src_ipv6, &b->src_ipv6, sizeof(struct in6_addr)))
+ return false;
- if (fsp->flow_type == TCP_V4_FLOW) {
- *min_hlen += sizeof(struct iphdr) +
- sizeof(struct tcphdr);
- eth_proto = htons(ETH_P_IP);
- ip_proto = IPPROTO_TCP;
- } else if (fsp->flow_type == UDP_V4_FLOW) {
- *min_hlen += sizeof(struct iphdr) +
- sizeof(struct udphdr);
- eth_proto = htons(ETH_P_IP);
- ip_proto = IPPROTO_UDP;
- } else if (fsp->flow_type == TCP_V6_FLOW) {
- *min_hlen += sizeof(struct ipv6hdr) +
- sizeof(struct tcphdr);
- eth_proto = htons(ETH_P_IPV6);
- ip_proto = IPPROTO_TCP;
- } else if (fsp->flow_type == UDP_V6_FLOW) {
- *min_hlen += sizeof(struct ipv6hdr) +
- sizeof(struct udphdr);
- eth_proto = htons(ETH_P_IPV6);
- ip_proto = IPPROTO_UDP;
+ if (memcmp(&a->dst_ipv6, &b->dst_ipv6, sizeof(struct in6_addr)))
+ return false;
+
+ return true;
+}
+
+static void qede_flow_build_ipv6_hdr(struct qede_arfs_tuple *t,
+ void *header)
+{
+ __be16 *ports = (__be16 *)(header + ETH_HLEN + sizeof(struct ipv6hdr));
+ struct ipv6hdr *ip6 = (struct ipv6hdr *)(header + ETH_HLEN);
+ struct ethhdr *eth = (struct ethhdr *)header;
+
+ eth->h_proto = t->eth_proto;
+ memcpy(&ip6->saddr, &t->src_ipv6, sizeof(struct in6_addr));
+ memcpy(&ip6->daddr, &t->dst_ipv6, sizeof(struct in6_addr));
+ ip6->version = 0x6;
+
+ if (t->ip_proto == IPPROTO_TCP) {
+ ip6->nexthdr = NEXTHDR_TCP;
+ ip6->payload_len = cpu_to_be16(sizeof(struct tcphdr));
} else {
- DP_NOTICE(edev, "Unsupported flow type = 0x%x\n",
- fsp->flow_type);
- return -EPROTONOSUPPORT;
+ ip6->nexthdr = NEXTHDR_UDP;
+ ip6->payload_len = cpu_to_be16(sizeof(struct udphdr));
}
- if (eth_proto == htons(ETH_P_IP)) {
- src_port = fsp->h_u.tcp_ip4_spec.psrc;
- dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+ /* ports is weakly typed to suit both TCP and UDP ports */
+ ports[0] = t->src_port;
+ ports[1] = t->dst_port;
+}
+
+/* Validate fields which are set and not accepted by the driver */
+static int qede_flow_spec_validate_unused(struct qede_dev *edev,
+ struct ethtool_rx_flow_spec *fs)
+{
+ if (fs->flow_type & FLOW_MAC_EXT) {
+ DP_INFO(edev, "Don't support MAC extensions\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((fs->flow_type & FLOW_EXT) &&
+ (fs->h_ext.vlan_etype || fs->h_ext.vlan_tci)) {
+ DP_INFO(edev, "Don't support vlan-based classification\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((fs->flow_type & FLOW_EXT) &&
+ (fs->h_ext.data[0] || fs->h_ext.data[1])) {
+ DP_INFO(edev, "Don't support user defined data\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int qede_flow_spec_to_tuple_ipv4_common(struct qede_dev *edev,
+ struct qede_arfs_tuple *t,
+ struct ethtool_rx_flow_spec *fs)
+{
+ if ((fs->h_u.tcp_ip4_spec.ip4src &
+ fs->m_u.tcp_ip4_spec.ip4src) != fs->h_u.tcp_ip4_spec.ip4src) {
+ DP_INFO(edev, "Don't support IP-masks\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((fs->h_u.tcp_ip4_spec.ip4dst &
+ fs->m_u.tcp_ip4_spec.ip4dst) != fs->h_u.tcp_ip4_spec.ip4dst) {
+ DP_INFO(edev, "Don't support IP-masks\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((fs->h_u.tcp_ip4_spec.psrc &
+ fs->m_u.tcp_ip4_spec.psrc) != fs->h_u.tcp_ip4_spec.psrc) {
+ DP_INFO(edev, "Don't support port-masks\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((fs->h_u.tcp_ip4_spec.pdst &
+ fs->m_u.tcp_ip4_spec.pdst) != fs->h_u.tcp_ip4_spec.pdst) {
+ DP_INFO(edev, "Don't support port-masks\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (fs->h_u.tcp_ip4_spec.tos) {
+ DP_INFO(edev, "Don't support tos\n");
+ return -EOPNOTSUPP;
+ }
+
+ t->eth_proto = htons(ETH_P_IP);
+ t->src_ipv4 = fs->h_u.tcp_ip4_spec.ip4src;
+ t->dst_ipv4 = fs->h_u.tcp_ip4_spec.ip4dst;
+ t->src_port = fs->h_u.tcp_ip4_spec.psrc;
+ t->dst_port = fs->h_u.tcp_ip4_spec.pdst;
+
+ /* We must either have a valid 4-tuple or only dst port
+ * or only src ip as an input
+ */
+ if (t->src_port && t->dst_port && t->src_ipv4 && t->dst_ipv4) {
+ t->mode = QED_FILTER_CONFIG_MODE_5_TUPLE;
+ } else if (!t->src_port && t->dst_port &&
+ !t->src_ipv4 && !t->dst_ipv4) {
+ t->mode = QED_FILTER_CONFIG_MODE_L4_PORT;
+ } else if (!t->src_port && !t->dst_port &&
+ !t->dst_ipv4 && t->src_ipv4) {
+ t->mode = QED_FILTER_CONFIG_MODE_IP_SRC;
} else {
- src_port = fsp->h_u.tcp_ip6_spec.psrc;
- dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+ DP_INFO(edev, "Invalid N-tuple\n");
+ return -EOPNOTSUPP;
}
- head = QEDE_ARFS_BUCKET_HEAD(edev, 0);
- hlist_for_each_entry_safe(fltr, temp, head, node) {
- if ((fltr->tuple.ip_proto == ip_proto &&
- fltr->tuple.eth_proto == eth_proto &&
- qede_compare_user_flow_ips(fltr, fsp, eth_proto) &&
- fltr->tuple.src_port == src_port &&
- fltr->tuple.dst_port == dst_port) ||
- fltr->sw_id == fsp->location)
- return -EEXIST;
+ t->ip_comp = qede_flow_spec_ipv4_cmp;
+ t->build_hdr = qede_flow_build_ipv4_hdr;
+ t->stringify = qede_flow_stringify_ipv4_hdr;
+
+ return 0;
+}
+
+static int qede_flow_spec_to_tuple_tcpv4(struct qede_dev *edev,
+ struct qede_arfs_tuple *t,
+ struct ethtool_rx_flow_spec *fs)
+{
+ t->ip_proto = IPPROTO_TCP;
+
+ if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int qede_flow_spec_to_tuple_udpv4(struct qede_dev *edev,
+ struct qede_arfs_tuple *t,
+ struct ethtool_rx_flow_spec *fs)
+{
+ t->ip_proto = IPPROTO_UDP;
+
+ if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int qede_flow_spec_to_tuple_ipv6_common(struct qede_dev *edev,
+ struct qede_arfs_tuple *t,
+ struct ethtool_rx_flow_spec *fs)
+{
+ struct in6_addr zero_addr;
+ void *p;
+
+ p = &zero_addr;
+ memset(p, 0, sizeof(zero_addr));
+
+ if ((fs->h_u.tcp_ip6_spec.psrc &
+ fs->m_u.tcp_ip6_spec.psrc) != fs->h_u.tcp_ip6_spec.psrc) {
+ DP_INFO(edev, "Don't support port-masks\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((fs->h_u.tcp_ip6_spec.pdst &
+ fs->m_u.tcp_ip6_spec.pdst) != fs->h_u.tcp_ip6_spec.pdst) {
+ DP_INFO(edev, "Don't support port-masks\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (fs->h_u.tcp_ip6_spec.tclass) {
+ DP_INFO(edev, "Don't support tclass\n");
+ return -EOPNOTSUPP;
}
+ t->eth_proto = htons(ETH_P_IPV6);
+ memcpy(&t->src_ipv6, &fs->h_u.tcp_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&t->dst_ipv6, &fs->h_u.tcp_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ t->src_port = fs->h_u.tcp_ip6_spec.psrc;
+ t->dst_port = fs->h_u.tcp_ip6_spec.pdst;
+
+ /* We must make sure we have a valid 4-tuple or only dest port
+ * or only src ip as an input
+ */
+ if (t->src_port && t->dst_port &&
+ memcmp(&t->src_ipv6, p, sizeof(struct in6_addr)) &&
+ memcmp(&t->dst_ipv6, p, sizeof(struct in6_addr))) {
+ t->mode = QED_FILTER_CONFIG_MODE_5_TUPLE;
+ } else if (!t->src_port && t->dst_port &&
+ !memcmp(&t->src_ipv6, p, sizeof(struct in6_addr)) &&
+ !memcmp(&t->dst_ipv6, p, sizeof(struct in6_addr))) {
+ t->mode = QED_FILTER_CONFIG_MODE_L4_PORT;
+ } else if (!t->src_port && !t->dst_port &&
+ !memcmp(&t->dst_ipv6, p, sizeof(struct in6_addr)) &&
+ memcmp(&t->src_ipv6, p, sizeof(struct in6_addr))) {
+ t->mode = QED_FILTER_CONFIG_MODE_IP_SRC;
+ } else {
+ DP_INFO(edev, "Invalid N-tuple\n");
+ return -EOPNOTSUPP;
+ }
+
+ t->ip_comp = qede_flow_spec_ipv6_cmp;
+ t->build_hdr = qede_flow_build_ipv6_hdr;
+
return 0;
}
-static int
-qede_poll_arfs_filter_config(struct qede_dev *edev,
- struct qede_arfs_fltr_node *fltr)
+static int qede_flow_spec_to_tuple_tcpv6(struct qede_dev *edev,
+ struct qede_arfs_tuple *t,
+ struct ethtool_rx_flow_spec *fs)
{
- int count = QEDE_ARFS_POLL_COUNT;
+ t->ip_proto = IPPROTO_TCP;
- while (fltr->used && count) {
- msleep(20);
- count--;
+ if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int qede_flow_spec_to_tuple_udpv6(struct qede_dev *edev,
+ struct qede_arfs_tuple *t,
+ struct ethtool_rx_flow_spec *fs)
+{
+ t->ip_proto = IPPROTO_UDP;
+
+ if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int qede_flow_spec_to_tuple(struct qede_dev *edev,
+ struct qede_arfs_tuple *t,
+ struct ethtool_rx_flow_spec *fs)
+{
+ memset(t, 0, sizeof(*t));
+
+ if (qede_flow_spec_validate_unused(edev, fs))
+ return -EOPNOTSUPP;
+
+ switch ((fs->flow_type & ~FLOW_EXT)) {
+ case TCP_V4_FLOW:
+ return qede_flow_spec_to_tuple_tcpv4(edev, t, fs);
+ case UDP_V4_FLOW:
+ return qede_flow_spec_to_tuple_udpv4(edev, t, fs);
+ case TCP_V6_FLOW:
+ return qede_flow_spec_to_tuple_tcpv6(edev, t, fs);
+ case UDP_V6_FLOW:
+ return qede_flow_spec_to_tuple_udpv6(edev, t, fs);
+ default:
+ DP_VERBOSE(edev, NETIF_MSG_IFUP,
+ "Can't support flow of type %08x\n", fs->flow_type);
+ return -EOPNOTSUPP;
}
- if (count == 0 || fltr->fw_rc) {
- qede_dequeue_fltr_and_config_searcher(edev, fltr);
- return -EIO;
+ return 0;
+}
+
+static int qede_flow_spec_validate(struct qede_dev *edev,
+ struct ethtool_rx_flow_spec *fs,
+ struct qede_arfs_tuple *t)
+{
+ if (fs->location >= QEDE_RFS_MAX_FLTR) {
+ DP_INFO(edev, "Location out-of-bounds\n");
+ return -EINVAL;
}
- return fltr->fw_rc;
+ /* Check location isn't already in use */
+ if (test_bit(fs->location, edev->arfs->arfs_fltr_bmap)) {
+ DP_INFO(edev, "Location already in use\n");
+ return -EINVAL;
+ }
+
+ /* Check if the filtering-mode could support the filter */
+ if (edev->arfs->filter_count &&
+ edev->arfs->mode != t->mode) {
+ DP_INFO(edev,
+ "flow_spec would require filtering mode %08x, but %08x is configured\n",
+ t->mode, edev->arfs->filter_count);
+ return -EINVAL;
+ }
+
+ /* If drop requested then no need to validate other data */
+ if (fs->ring_cookie == RX_CLS_FLOW_DISC)
+ return 0;
+
+ if (ethtool_get_flow_spec_ring_vf(fs->ring_cookie))
+ return 0;
+
+ if (fs->ring_cookie >= QEDE_RSS_COUNT(edev)) {
+ DP_INFO(edev, "Queue out-of-bounds\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Must be called while qede lock is held */
+static struct qede_arfs_fltr_node *
+qede_flow_find_fltr(struct qede_dev *edev, struct qede_arfs_tuple *t)
+{
+ struct qede_arfs_fltr_node *fltr;
+ struct hlist_node *temp;
+ struct hlist_head *head;
+
+ head = QEDE_ARFS_BUCKET_HEAD(edev, 0);
+
+ hlist_for_each_entry_safe(fltr, temp, head, node) {
+ if (fltr->tuple.ip_proto == t->ip_proto &&
+ fltr->tuple.src_port == t->src_port &&
+ fltr->tuple.dst_port == t->dst_port &&
+ t->ip_comp(&fltr->tuple, t))
+ return fltr;
+ }
+
+ return NULL;
+}
+
+static void qede_flow_set_destination(struct qede_dev *edev,
+ struct qede_arfs_fltr_node *n,
+ struct ethtool_rx_flow_spec *fs)
+{
+ if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
+ n->b_is_drop = true;
+ return;
+ }
+
+ n->vfid = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
+ n->rxq_id = ethtool_get_flow_spec_ring(fs->ring_cookie);
+ n->next_rxq_id = n->rxq_id;
+
+ if (n->vfid)
+ DP_VERBOSE(edev, QED_MSG_SP,
+ "Configuring N-tuple for VF 0x%02x\n", n->vfid - 1);
}
int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
{
struct ethtool_rx_flow_spec *fsp = &info->fs;
struct qede_arfs_fltr_node *n;
- int min_hlen = ETH_HLEN, rc;
- struct ethhdr *eth;
- struct iphdr *ip;
- __be16 *ports;
+ struct qede_arfs_tuple t;
+ int min_hlen, rc;
__qede_lock(edev);
@@ -1559,16 +1889,28 @@ int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
goto unlock;
}
- rc = qede_validate_and_check_flow_exist(edev, fsp, &min_hlen);
+ /* Translate the flow specification into something fittign our DB */
+ rc = qede_flow_spec_to_tuple(edev, &t, fsp);
if (rc)
goto unlock;
+ /* Make sure location is valid and filter isn't already set */
+ rc = qede_flow_spec_validate(edev, fsp, &t);
+ if (rc)
+ goto unlock;
+
+ if (qede_flow_find_fltr(edev, &t)) {
+ rc = -EINVAL;
+ goto unlock;
+ }
+
n = kzalloc(sizeof(*n), GFP_KERNEL);
if (!n) {
rc = -ENOMEM;
goto unlock;
}
+ min_hlen = qede_flow_get_min_header_size(&t);
n->data = kzalloc(min_hlen, GFP_KERNEL);
if (!n->data) {
kfree(n);
@@ -1579,68 +1921,13 @@ int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
n->sw_id = fsp->location;
set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap);
n->buf_len = min_hlen;
- n->rxq_id = fsp->ring_cookie;
- n->next_rxq_id = n->rxq_id;
- eth = (struct ethhdr *)n->data;
- if (info->fs.flow_type == TCP_V4_FLOW ||
- info->fs.flow_type == UDP_V4_FLOW) {
- ports = (__be16 *)(n->data + ETH_HLEN +
- sizeof(struct iphdr));
- eth->h_proto = htons(ETH_P_IP);
- n->tuple.eth_proto = htons(ETH_P_IP);
- n->tuple.src_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4src;
- n->tuple.dst_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4dst;
- n->tuple.src_port = info->fs.h_u.tcp_ip4_spec.psrc;
- n->tuple.dst_port = info->fs.h_u.tcp_ip4_spec.pdst;
- ports[0] = n->tuple.src_port;
- ports[1] = n->tuple.dst_port;
- ip = (struct iphdr *)(n->data + ETH_HLEN);
- ip->saddr = info->fs.h_u.tcp_ip4_spec.ip4src;
- ip->daddr = info->fs.h_u.tcp_ip4_spec.ip4dst;
- ip->version = 0x4;
- ip->ihl = 0x5;
-
- if (info->fs.flow_type == TCP_V4_FLOW) {
- n->tuple.ip_proto = IPPROTO_TCP;
- ip->protocol = IPPROTO_TCP;
- } else {
- n->tuple.ip_proto = IPPROTO_UDP;
- ip->protocol = IPPROTO_UDP;
- }
- ip->tot_len = cpu_to_be16(min_hlen - ETH_HLEN);
- } else {
- struct ipv6hdr *ip6;
-
- ip6 = (struct ipv6hdr *)(n->data + ETH_HLEN);
- ports = (__be16 *)(n->data + ETH_HLEN +
- sizeof(struct ipv6hdr));
- eth->h_proto = htons(ETH_P_IPV6);
- n->tuple.eth_proto = htons(ETH_P_IPV6);
- memcpy(&n->tuple.src_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6src,
- sizeof(struct in6_addr));
- memcpy(&n->tuple.dst_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6dst,
- sizeof(struct in6_addr));
- n->tuple.src_port = info->fs.h_u.tcp_ip6_spec.psrc;
- n->tuple.dst_port = info->fs.h_u.tcp_ip6_spec.pdst;
- ports[0] = n->tuple.src_port;
- ports[1] = n->tuple.dst_port;
- memcpy(&ip6->saddr, &n->tuple.src_ipv6,
- sizeof(struct in6_addr));
- memcpy(&ip6->daddr, &n->tuple.dst_ipv6,
- sizeof(struct in6_addr));
- ip6->version = 0x6;
+ memcpy(&n->tuple, &t, sizeof(n->tuple));
- if (info->fs.flow_type == TCP_V6_FLOW) {
- n->tuple.ip_proto = IPPROTO_TCP;
- ip6->nexthdr = NEXTHDR_TCP;
- ip6->payload_len = cpu_to_be16(sizeof(struct tcphdr));
- } else {
- n->tuple.ip_proto = IPPROTO_UDP;
- ip6->nexthdr = NEXTHDR_UDP;
- ip6->payload_len = cpu_to_be16(sizeof(struct udphdr));
- }
- }
+ qede_flow_set_destination(edev, n, fsp);
+
+ /* Build a minimal header according to the flow */
+ n->tuple.build_hdr(&n->tuple, n->data);
rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0);
if (rc)
@@ -1650,6 +1937,7 @@ int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
rc = qede_poll_arfs_filter_config(edev, n);
unlock:
__qede_unlock(edev);
+
return rc;
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 9e70f713c3c5..d118771e1a7b 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -347,6 +347,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
p_common->rx_bcast_pkts = stats.common.rx_bcast_pkts;
p_common->mftag_filter_discards = stats.common.mftag_filter_discards;
p_common->mac_filter_discards = stats.common.mac_filter_discards;
+ p_common->gft_filter_drop = stats.common.gft_filter_drop;
p_common->tx_ucast_bytes = stats.common.tx_ucast_bytes;
p_common->tx_mcast_bytes = stats.common.tx_mcast_bytes;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index d118da5a10a2..ffd68a7bc9e1 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -1104,7 +1104,6 @@ static int rtl8139_init_one(struct pci_dev *pdev,
return 0;
err_out:
- netif_napi_del(&tp->napi);
__rtl8139_cleanup_dev (dev);
pci_disable_device (pdev);
return i;
@@ -1119,7 +1118,6 @@ static void rtl8139_remove_one(struct pci_dev *pdev)
assert (dev != NULL);
cancel_delayed_work_sync(&tp->thread);
- netif_napi_del(&tp->napi);
unregister_netdev (dev);
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index cece961f2e82..c3ad564ac4c0 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -435,17 +435,18 @@ static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
} while (1);
}
-/* Remove buffers put into a tx_queue. None of the buffers must have
- * an skb attached.
+/* Remove buffers put into a tx_queue for the current packet.
+ * None of the buffers must have an skb attached.
*/
-static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
+static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+ unsigned int insert_count)
{
struct efx_tx_buffer *buffer;
unsigned int bytes_compl = 0;
unsigned int pkts_compl = 0;
/* Work backwards until we hit the original insert pointer value */
- while (tx_queue->insert_count != tx_queue->write_count) {
+ while (tx_queue->insert_count != insert_count) {
--tx_queue->insert_count;
buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
@@ -504,6 +505,8 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
*/
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
{
+ unsigned int old_insert_count = tx_queue->insert_count;
+ bool xmit_more = skb->xmit_more;
bool data_mapped = false;
unsigned int segments;
unsigned int skb_len;
@@ -553,8 +556,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* Update BQL */
netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
+ efx_tx_maybe_stop_queue(tx_queue);
+
/* Pass off to hardware */
- if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
+ if (!xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
/* There could be packets left on the partner queue if those
@@ -577,14 +582,26 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
tx_queue->tx_packets++;
}
- efx_tx_maybe_stop_queue(tx_queue);
-
return NETDEV_TX_OK;
err:
- efx_enqueue_unwind(tx_queue);
+ efx_enqueue_unwind(tx_queue, old_insert_count);
dev_kfree_skb_any(skb);
+
+ /* If we're not expecting another transmit and we had something to push
+ * on this queue or a partner queue then we need to push here to get the
+ * previous packets out.
+ */
+ if (!xmit_more) {
+ struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
+
+ if (txq2->xmit_more_available)
+ efx_nic_push_buffers(txq2);
+
+ efx_nic_push_buffers(tx_queue);
+ }
+
return NETDEV_TX_OK;
}
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 7f3dab4b4cbc..5428bb261102 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1237,7 +1237,10 @@ static void rndis_get_friendly_name(struct net_device *net,
if (rndis_filter_query_device(rndis_device, net_device,
RNDIS_OID_GEN_FRIENDLY_NAME,
wname, &size) != 0)
- return;
+ return; /* ignore if host does not support */
+
+ if (size == 0)
+ return; /* name not set */
/* Convert Windows Unicode string to UTF-8 */
len = ucs2_as_utf8(ifalias, wname, sizeof(ifalias));
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index d6ff881165d0..e6730a01d130 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1129,6 +1129,7 @@ static int team_upper_dev_link(struct team *team, struct team_port *port,
int err;
lag_upper_info.tx_type = team->mode->lag_tx_type;
+ lag_upper_info.hash_type = NETDEV_LAG_HASH_UNKNOWN;
err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
&lag_upper_info, extack);
if (err)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 99bf3cee1345..33a9c5661038 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -70,6 +70,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
+#include <net/xdp.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
@@ -1284,34 +1285,44 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};
-static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
+static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile;
u32 numqueues;
- int ret = 0;
+ int drops = 0;
+ int cnt = n;
+ int i;
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) {
- ret = -ENOSPC;
- goto out;
+ rcu_read_unlock();
+ return -ENXIO; /* Caller will free/return all frames */
}
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
- /* Encode the XDP flag into lowest bit for consumer to differ
- * XDP buffer from sk_buff.
- */
- if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
- this_cpu_inc(tun->pcpu_stats->tx_dropped);
- ret = -ENOSPC;
+
+ spin_lock(&tfile->tx_ring.producer_lock);
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdp = frames[i];
+ /* Encode the XDP flag into lowest bit for consumer to differ
+ * XDP buffer from sk_buff.
+ */
+ void *frame = tun_xdp_to_ptr(xdp);
+
+ if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
+ this_cpu_inc(tun->pcpu_stats->tx_dropped);
+ xdp_return_frame_rx_napi(xdp);
+ drops++;
+ }
}
+ spin_unlock(&tfile->tx_ring.producer_lock);
-out:
rcu_read_unlock();
- return ret;
+ return cnt - drops;
}
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
@@ -1321,7 +1332,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame))
return -EOVERFLOW;
- return tun_xdp_xmit(dev, frame);
+ return tun_xdp_xmit(dev, 1, &frame);
}
static void tun_xdp_flush(struct net_device *dev)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f34794a76c4d..39a0783d1cde 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
virtqueue_kick(sq->vq);
}
-static int __virtnet_xdp_xmit(struct virtnet_info *vi,
- struct xdp_frame *xdpf)
+static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
+ struct send_queue *sq,
+ struct xdp_frame *xdpf)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
- struct xdp_frame *xdpf_sent;
- struct send_queue *sq;
- unsigned int len;
- unsigned int qp;
int err;
- qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
- sq = &vi->sq[qp];
-
- /* Free up any pending old buffers before queueing new ones. */
- while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
- xdp_return_frame(xdpf_sent);
-
/* virtqueue want to use data area in-front of packet */
if (unlikely(xdpf->metasize > 0))
return -EOPNOTSUPP;
@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
return 0;
}
-static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
+static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
+ struct xdp_frame *xdpf)
+{
+ struct xdp_frame *xdpf_sent;
+ struct send_queue *sq;
+ unsigned int len;
+ unsigned int qp;
+
+ qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ sq = &vi->sq[qp];
+
+ /* Free up any pending old buffers before queueing new ones. */
+ while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+ xdp_return_frame(xdpf_sent);
+
+ return __virtnet_xdp_xmit_one(vi, sq, xdpf);
+}
+
+static int virtnet_xdp_xmit(struct net_device *dev,
+ int n, struct xdp_frame **frames)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
+ struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog;
+ struct send_queue *sq;
+ unsigned int len;
+ unsigned int qp;
+ int drops = 0;
+ int err;
+ int i;
+
+ qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ sq = &vi->sq[qp];
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!xdp_prog)
return -ENXIO;
- return __virtnet_xdp_xmit(vi, xdpf);
+ /* Free up any pending old buffers before queueing new ones. */
+ while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
+ xdp_return_frame(xdpf_sent);
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+
+ err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
+ if (err) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+ return n - drops;
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
- err = __virtnet_xdp_xmit(vi, xdpf);
+ err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
- err = __virtnet_xdp_xmit(vi, xdpf);
+ err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page))
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ed0122b45b63..bbe297436e5d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -69,8 +69,8 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
- u32 btf_key_id;
- u32 btf_value_id;
+ u32 btf_key_type_id;
+ u32 btf_value_type_id;
struct btf *btf;
bool unpriv_array;
/* 55 bytes hole */
@@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
int bpf_get_file_flag(int flags);
+int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
+ size_t actual_size);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
@@ -485,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
-struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct xdp_buff;
+
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+ struct net_device *dev_rx);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map);
-struct xdp_buff;
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -571,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
{
}
+struct xdp_buff;
+struct bpf_dtab_netdev;
+
+static inline
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+ struct net_device *dev_rx)
+{
+ return 0;
+}
+
static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
@@ -585,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
{
}
-struct xdp_buff;
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff *xdp,
struct net_device *dev_rx)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index b67f8793de0d..b161e506dcfc 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -9,9 +9,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9dbcb9d55921..d358d1815c16 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,6 +517,7 @@ struct sk_msg_buff {
bool sg_copy[MAX_SKB_FRAGS];
__u32 flags;
struct sock *sk_redir;
+ struct sock *sk;
struct sk_buff *skb;
struct list_head list;
};
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 585d27182425..7843b98e1c6e 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -50,6 +50,7 @@ struct br_ip_list {
#define BR_VLAN_TUNNEL BIT(13)
#define BR_BCAST_FLOOD BIT(14)
#define BR_NEIGH_SUPPRESS BIT(15)
+#define BR_ISOLATED BIT(16)
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 03ed492c4e14..8452f72087ef 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1185,9 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
- * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
- * This function is used to submit a XDP packet for transmit on a
- * netdevice.
+ * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
+ * This function is used to submit @n XDP packets for transmit on a
+ * netdevice. Returns number of frames successfully transmitted, frames
+ * that got dropped are freed/returned via xdp_return_frame().
+ * Returns negative number, means general error invoking ndo, meaning
+ * no frames were xmit'ed and core-caller will free all frames.
+ * TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1375,8 +1379,8 @@ struct net_device_ops {
int needed_headroom);
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
- int (*ndo_xdp_xmit)(struct net_device *dev,
- struct xdp_frame *xdp);
+ int (*ndo_xdp_xmit)(struct net_device *dev, int n,
+ struct xdp_frame **xdp);
void (*ndo_xdp_flush)(struct net_device *dev);
};
@@ -2328,8 +2332,19 @@ enum netdev_lag_tx_type {
NETDEV_LAG_TX_TYPE_HASH,
};
+enum netdev_lag_hash {
+ NETDEV_LAG_HASH_NONE,
+ NETDEV_LAG_HASH_L2,
+ NETDEV_LAG_HASH_L34,
+ NETDEV_LAG_HASH_L23,
+ NETDEV_LAG_HASH_E23,
+ NETDEV_LAG_HASH_E34,
+ NETDEV_LAG_HASH_UNKNOWN,
+};
+
struct netdev_lag_upper_info {
enum netdev_lag_tx_type tx_type;
+ enum netdev_lag_hash hash_type;
};
struct netdev_lag_lower_state_info {
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e71e99eb9a4e..eec302b61cfe 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task);
extern struct file *perf_event_get(unsigned int fd);
+extern const struct perf_event *perf_get_event(struct file *file);
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu);
@@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_delayed_put(struct task_struct *task) { }
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
+static inline const struct perf_event *perf_get_event(struct file *file)
+{
+ return ERR_PTR(-EINVAL);
+}
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
return ERR_PTR(-EINVAL);
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 7f9756fe9180..2978fa4add42 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -66,6 +66,7 @@ enum qed_filter_config_mode {
QED_FILTER_CONFIG_MODE_5_TUPLE,
QED_FILTER_CONFIG_MODE_L4_PORT,
QED_FILTER_CONFIG_MODE_IP_DEST,
+ QED_FILTER_CONFIG_MODE_IP_SRC,
};
struct qed_ntuple_filter_params {
@@ -88,6 +89,9 @@ struct qed_ntuple_filter_params {
/* true iff this filter is to be added. Else to be removed */
bool b_is_add;
+
+ /* If flow needs to be dropped */
+ bool b_is_drop;
};
struct qed_dev_eth_info {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 44af652e7407..ac991a3b8f03 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1129,6 +1129,7 @@ struct qed_eth_stats_common {
u64 rx_bcast_pkts;
u64 mftag_filter_discards;
u64 mac_filter_discards;
+ u64 gft_filter_drop;
u64 tx_ucast_bytes;
u64 tx_mcast_bytes;
u64 tx_bcast_bytes;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2bde3eff564c..d34144a3c5b5 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+ u32 *fd_type, const char **buf,
+ u64 *probe_offset, u64 *probe_addr);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
{
return NULL;
}
+static inline int bpf_get_perf_event_info(const struct perf_event *event,
+ u32 *prog_id, u32 *fd_type,
+ const char **buf, u64 *probe_offset,
+ u64 *probe_addr)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
@@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
#ifdef CONFIG_KPROBE_EVENTS
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_kprobe_destroy(struct perf_event *event);
+extern int bpf_get_kprobe_info(const struct perf_event *event,
+ u32 *fd_type, const char **symbol,
+ u64 *probe_offset, u64 *probe_addr,
+ bool perf_type_tracepoint);
#endif
#ifdef CONFIG_UPROBE_EVENTS
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_uprobe_destroy(struct perf_event *event);
+extern int bpf_get_uprobe_info(const struct perf_event *event,
+ u32 *fd_type, const char **filename,
+ u64 *probe_offset, bool perf_type_tracepoint);
#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index ff766ab207e0..c07d4dd09361 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -236,6 +236,8 @@ struct ipv6_stub {
struct flowi6 *fl6, int oif,
const struct sk_buff *skb,
int strict);
+ u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
+ struct in6_addr *saddr);
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cc70f6da8462..7897efe80727 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
return f6i->fib6_nh.nh_dev;
}
+static inline
+struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
+{
+ return f6i->fib6_nh.nh_lwtstate;
+}
+
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9e4d0f0aeb6d..59656fc580df 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -294,6 +294,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+ struct in6_addr *saddr);
+
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb,
const void *daddr);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 81d0f2107ff1..69c91d1934c1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
}
#endif
+u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
+
#endif /* _NET_FIB_H */
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index c79087153148..694d055e01ef 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct);
-static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
+static inline void page_pool_put_page(struct page_pool *pool,
+ struct page *page, bool allow_direct)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- __page_pool_put_page(pool, page, false);
+ __page_pool_put_page(pool, page, allow_direct);
#endif
}
/* Very limited use-cases allow recycle direct */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0005f0b40fe9..f3ec43725724 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -33,7 +33,7 @@ struct tcf_block_ext_info {
};
struct tcf_block_cb;
-bool tcf_queue_work(struct work_struct *work);
+bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
#ifdef CONFIG_NET_CLS
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
diff --git a/include/net/seg6.h b/include/net/seg6.h
index 099bad59dc90..e029e301faa5 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -49,7 +49,11 @@ struct seg6_pernet_data {
static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
{
+#if IS_ENABLED(CONFIG_IPV6)
return net->ipv6.seg6_data;
+#else
+ return NULL;
+#endif
}
extern int seg6_init(void);
@@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto);
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
-
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id);
#endif
diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
new file mode 100644
index 000000000000..661fd5b4d3e0
--- /dev/null
+++ b/include/net/seg6_local.h
@@ -0,0 +1,32 @@
+/*
+ * SR-IPv6 implementation
+ *
+ * Authors:
+ * David Lebrun <[email protected]>
+ * eBPF support: Mathieu Xhonneux <[email protected]>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SEG6_LOCAL_H
+#define _NET_SEG6_LOCAL_H
+
+#include <linux/percpu.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+
+extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id);
+
+struct seg6_bpf_srh_state {
+ bool valid;
+ u16 hdrlen;
+};
+
+DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+
+#endif
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 0b689cf561c7..7ad779237ae8 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
}
void xdp_return_frame(struct xdp_frame *xdpf);
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 185f4928fbda..7a647c56ec15 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -1,15 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0
- * AF_XDP internal functions
+/* SPDX-License-Identifier: GPL-2.0 */
+/* AF_XDP internal functions
* Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef _LINUX_XDP_SOCK_H
diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index 81b7e985bb45..9763cddd0594 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -12,12 +12,14 @@
TRACE_EVENT(fib_table_lookup,
- TP_PROTO(u32 tb_id, const struct flowi4 *flp),
+ TP_PROTO(u32 tb_id, const struct flowi4 *flp,
+ const struct fib_nh *nh, int err),
- TP_ARGS(tb_id, flp),
+ TP_ARGS(tb_id, flp, nh, err),
TP_STRUCT__entry(
__field( u32, tb_id )
+ __field( int, err )
__field( int, oif )
__field( int, iif )
__field( __u8, tos )
@@ -25,12 +27,19 @@ TRACE_EVENT(fib_table_lookup,
__field( __u8, flags )
__array( __u8, src, 4 )
__array( __u8, dst, 4 )
+ __array( __u8, gw, 4 )
+ __array( __u8, saddr, 4 )
+ __field( u16, sport )
+ __field( u16, dport )
+ __field( u8, proto )
+ __dynamic_array(char, name, IFNAMSIZ )
),
TP_fast_assign(
__be32 *p32;
__entry->tb_id = tb_id;
+ __entry->err = err;
__entry->oif = flp->flowi4_oif;
__entry->iif = flp->flowi4_iif;
__entry->tos = flp->flowi4_tos;
@@ -42,71 +51,41 @@ TRACE_EVENT(fib_table_lookup,
p32 = (__be32 *) __entry->dst;
*p32 = flp->daddr;
- ),
-
- TP_printk("table %u oif %d iif %d src %pI4 dst %pI4 tos %d scope %d flags %x",
- __entry->tb_id, __entry->oif, __entry->iif,
- __entry->src, __entry->dst, __entry->tos, __entry->scope,
- __entry->flags)
-);
-
-TRACE_EVENT(fib_table_lookup_nh,
-
- TP_PROTO(const struct fib_nh *nh),
-
- TP_ARGS(nh),
-
- TP_STRUCT__entry(
- __string( name, nh->nh_dev->name)
- __field( int, oif )
- __array( __u8, src, 4 )
- ),
-
- TP_fast_assign(
- __be32 *p32 = (__be32 *) __entry->src;
-
- __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "not set");
- __entry->oif = nh->nh_oif;
- *p32 = nh->nh_saddr;
- ),
-
- TP_printk("nexthop dev %s oif %d src %pI4",
- __get_str(name), __entry->oif, __entry->src)
-);
-
-TRACE_EVENT(fib_validate_source,
-
- TP_PROTO(const struct net_device *dev, const struct flowi4 *flp),
-
- TP_ARGS(dev, flp),
- TP_STRUCT__entry(
- __string( name, dev->name )
- __field( int, oif )
- __field( int, iif )
- __field( __u8, tos )
- __array( __u8, src, 4 )
- __array( __u8, dst, 4 )
- ),
-
- TP_fast_assign(
- __be32 *p32;
-
- __assign_str(name, dev ? dev->name : "not set");
- __entry->oif = flp->flowi4_oif;
- __entry->iif = flp->flowi4_iif;
- __entry->tos = flp->flowi4_tos;
-
- p32 = (__be32 *) __entry->src;
- *p32 = flp->saddr;
-
- p32 = (__be32 *) __entry->dst;
- *p32 = flp->daddr;
+ __entry->proto = flp->flowi4_proto;
+ if (__entry->proto == IPPROTO_TCP ||
+ __entry->proto == IPPROTO_UDP) {
+ __entry->sport = ntohs(flp->fl4_sport);
+ __entry->dport = ntohs(flp->fl4_dport);
+ } else {
+ __entry->sport = 0;
+ __entry->dport = 0;
+ }
+
+ if (nh) {
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = nh->nh_saddr;
+
+ p32 = (__be32 *) __entry->gw;
+ *p32 = nh->nh_gw;
+
+ __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-");
+ } else {
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = 0;
+
+ p32 = (__be32 *) __entry->gw;
+ *p32 = 0;
+
+ __assign_str(name, "-");
+ }
),
- TP_printk("dev %s oif %d iif %d tos %d src %pI4 dst %pI4",
- __get_str(name), __entry->oif, __entry->iif, __entry->tos,
- __entry->src, __entry->dst)
+ TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d",
+ __entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
+ __entry->src, __entry->sport, __entry->dst, __entry->dport,
+ __entry->tos, __entry->scope, __entry->flags,
+ __get_str(name), __entry->gw, __entry->saddr, __entry->err)
);
#endif /* _TRACE_FIB_H */
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 1b8d951e3c12..b088b54d699c 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -19,7 +19,7 @@ TRACE_EVENT(fib6_table_lookup,
TP_STRUCT__entry(
__field( u32, tb_id )
-
+ __field( int, err )
__field( int, oif )
__field( int, iif )
__field( __u8, tos )
@@ -27,7 +27,10 @@ TRACE_EVENT(fib6_table_lookup,
__field( __u8, flags )
__array( __u8, src, 16 )
__array( __u8, dst, 16 )
-
+ __field( u16, sport )
+ __field( u16, dport )
+ __field( u8, proto )
+ __field( u8, rt_type )
__dynamic_array( char, name, IFNAMSIZ )
__array( __u8, gw, 16 )
),
@@ -36,6 +39,7 @@ TRACE_EVENT(fib6_table_lookup,
struct in6_addr *in6;
__entry->tb_id = table->tb6_id;
+ __entry->err = ip6_rt_type_to_error(f6i->fib6_type);
__entry->oif = flp->flowi6_oif;
__entry->iif = flp->flowi6_iif;
__entry->tos = ip6_tclass(flp->flowlabel);
@@ -48,10 +52,20 @@ TRACE_EVENT(fib6_table_lookup,
in6 = (struct in6_addr *)__entry->dst;
*in6 = flp->daddr;
+ __entry->proto = flp->flowi6_proto;
+ if (__entry->proto == IPPROTO_TCP ||
+ __entry->proto == IPPROTO_UDP) {
+ __entry->sport = ntohs(flp->fl6_sport);
+ __entry->dport = ntohs(flp->fl6_dport);
+ } else {
+ __entry->sport = 0;
+ __entry->dport = 0;
+ }
+
if (f6i->fib6_nh.nh_dev) {
__assign_str(name, f6i->fib6_nh.nh_dev);
} else {
- __assign_str(name, "");
+ __assign_str(name, "-");
}
if (f6i == net->ipv6.fib6_null_entry) {
struct in6_addr in6_zero = {};
@@ -65,10 +79,11 @@ TRACE_EVENT(fib6_table_lookup,
}
),
- TP_printk("table %3u oif %d iif %d src %pI6c dst %pI6c tos %d scope %d flags %x ==> dev %s gw %pI6c",
- __entry->tb_id, __entry->oif, __entry->iif,
- __entry->src, __entry->dst, __entry->tos, __entry->scope,
- __entry->flags, __get_str(name), __entry->gw)
+ TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
+ __entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
+ __entry->src, __entry->sport, __entry->dst, __entry->dport,
+ __entry->tos, __entry->scope, __entry->flags,
+ __get_str(name), __entry->gw, __entry->err)
);
#endif /* _TRACE_FIB6_H */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 8989a92c571a..1ecf4c67fcf7 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
__entry->map_id, __entry->map_index)
);
+#ifndef __DEVMAP_OBJ_TYPE
+#define __DEVMAP_OBJ_TYPE
+struct _bpf_dtab_netdev {
+ struct net_device *dev;
+};
+#endif /* __DEVMAP_OBJ_TYPE */
+
#define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \
(!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
- ((struct net_device *)fwd)->ifindex : 0)))
+ ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
__entry->to_cpu)
);
+TRACE_EVENT(xdp_devmap_xmit,
+
+ TP_PROTO(const struct bpf_map *map, u32 map_index,
+ int sent, int drops,
+ const struct net_device *from_dev,
+ const struct net_device *to_dev, int err),
+
+ TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
+
+ TP_STRUCT__entry(
+ __field(int, map_id)
+ __field(u32, act)
+ __field(u32, map_index)
+ __field(int, drops)
+ __field(int, sent)
+ __field(int, from_ifindex)
+ __field(int, to_ifindex)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->map_id = map->id;
+ __entry->act = XDP_REDIRECT;
+ __entry->map_index = map_index;
+ __entry->drops = drops;
+ __entry->sent = sent;
+ __entry->from_ifindex = from_dev->ifindex;
+ __entry->to_ifindex = to_dev->ifindex;
+ __entry->err = err;
+ ),
+
+ TP_printk("ndo_xdp_xmit"
+ " map_id=%d map_index=%d action=%s"
+ " sent=%d drops=%d"
+ " from_ifindex=%d to_ifindex=%d err=%d",
+ __entry->map_id, __entry->map_index,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->sent, __entry->drops,
+ __entry->from_ifindex, __entry->to_ifindex, __entry->err)
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d94d333a8225..9b8c6e310e9a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
};
enum bpf_map_type {
@@ -141,6 +142,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
};
enum bpf_attach_type {
@@ -284,8 +286,8 @@ union bpf_attr {
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */
- __u32 btf_key_id; /* BTF type_id of the key */
- __u32 btf_value_id; /* BTF type_id of the value */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -379,6 +381,22 @@ union bpf_attr {
__u32 btf_log_size;
__u32 btf_log_level;
};
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1902,6 +1920,90 @@ union bpf_attr {
* egress otherwise). This is the only flag supported for now.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -1976,7 +2078,11 @@ union bpf_attr {
FN(fib_lookup), \
FN(sock_hash_update), \
FN(msg_redirect_hash), \
- FN(sk_redirect_hash),
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
BPF_HDR_START_NET,
};
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE
+};
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -2176,6 +2288,14 @@ enum sk_action {
struct sk_msg_md {
void *data;
void *data_end;
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
};
#define BPF_TAG_SIZE 8
@@ -2197,6 +2317,10 @@ struct bpf_prog_info {
__u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -2211,8 +2335,8 @@ struct bpf_map_info {
__u64 netns_dev;
__u64 netns_ino;
__u32 btf_id;
- __u32 btf_key_id;
- __u32 btf_value_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index bcb56ee47014..0b5ddbe135a4 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -12,42 +12,29 @@ struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
-
- __u32 parent_label;
- __u32 parent_name;
+ __u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
- __u32 label_off; /* offset of label section */
- __u32 object_off; /* offset of data object section*/
- __u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
/* Max # of type identifier */
-#define BTF_MAX_TYPE 0x7fffffff
+#define BTF_MAX_TYPE 0x0000ffff
/* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET 0x7fffffff
+#define BTF_MAX_NAME_OFFSET 0x0000ffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
-/* The type id is referring to a parent BTF */
-#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
-#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
-
-/* String is in the ELF string section */
-#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
-#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
-
struct btf_type {
__u32 name_off;
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
- * bits 24-28: kind (e.g. int, ptr, array...etc)
- * bits 29-30: unused
- * bits 31: root
+ * bits 24-27: kind (e.g. int, ptr, array...etc)
+ * bits 28-31: unused
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
@@ -62,8 +49,7 @@ struct btf_type {
};
};
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
-#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_KIND_UNKN 0 /* Unknown */
@@ -88,15 +74,14 @@ struct btf_type {
/* BTF_KIND_INT is followed by a u32 and the following
* is the 32 bits arrangement:
*/
-#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
/* Attributes stored in the BTF_INT_ENCODING */
-#define BTF_INT_SIGNED 0x1
-#define BTF_INT_CHAR 0x2
-#define BTF_INT_BOOL 0x4
-#define BTF_INT_VARARGS 0x8
+#define BTF_INT_SIGNED (1 << 0)
+#define BTF_INT_CHAR (1 << 1)
+#define BTF_INT_BOOL (1 << 2)
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
* The exact number of btf_enum is stored in the vlen (of the
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b85266420bfb..cf01b6824244 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -333,6 +333,7 @@ enum {
IFLA_BRPORT_BCAST_FLOOD,
IFLA_BRPORT_GROUP_FWD_MASK,
IFLA_BRPORT_NEIGH_SUPPRESS,
+ IFLA_BRPORT_ISOLATED,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 77b88c4efe98..4737cfe222f5 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -1,17 +1,8 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
- *
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
* if_xdp: XDP socket user-space interface
* Copyright(c) 2018 Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* Author(s): Björn Töpel <[email protected]>
* Magnus Karlsson <[email protected]>
*/
@@ -26,19 +17,33 @@
struct sockaddr_xdp {
__u16 sxdp_family;
+ __u16 sxdp_flags;
__u32 sxdp_ifindex;
__u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd;
- __u16 sxdp_flags;
+};
+
+struct xdp_ring_offset {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+struct xdp_mmap_offsets {
+ struct xdp_ring_offset rx;
+ struct xdp_ring_offset tx;
+ struct xdp_ring_offset fr; /* Fill */
+ struct xdp_ring_offset cr; /* Completion */
};
/* XDP socket options */
-#define XDP_RX_RING 1
-#define XDP_TX_RING 2
-#define XDP_UMEM_REG 3
-#define XDP_UMEM_FILL_RING 4
-#define XDP_UMEM_COMPLETION_RING 5
-#define XDP_STATISTICS 6
+#define XDP_MMAP_OFFSETS 1
+#define XDP_RX_RING 2
+#define XDP_TX_RING 3
+#define XDP_UMEM_REG 4
+#define XDP_UMEM_FILL_RING 5
+#define XDP_UMEM_COMPLETION_RING 6
+#define XDP_STATISTICS 7
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@@ -59,6 +64,7 @@ struct xdp_statistics {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
+/* Rx/Tx descriptor */
struct xdp_desc {
__u32 idx;
__u32 len;
@@ -67,21 +73,6 @@ struct xdp_desc {
__u8 padding[5];
};
-struct xdp_ring {
- __u32 producer __attribute__((aligned(64)));
- __u32 consumer __attribute__((aligned(64)));
-};
-
-/* Used for the RX and TX queues for packets */
-struct xdp_rxtx_ring {
- struct xdp_ring ptrs;
- struct xdp_desc desc[0] __attribute__((aligned(64)));
-};
-
-/* Used for the fill and completion queues for buffers */
-struct xdp_umem_ring {
- struct xdp_ring ptrs;
- __u32 desc[0] __attribute__((aligned(64)));
-};
+/* UMEM descriptor is __u32 */
#endif /* _LINUX_IF_XDP_H */
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index ef2d8c3e76c1..edc138bdc56d 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -25,6 +25,7 @@ enum {
SEG6_LOCAL_NH6,
SEG6_LOCAL_IIF,
SEG6_LOCAL_OIF,
+ SEG6_LOCAL_BPF,
__SEG6_LOCAL_MAX,
};
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
@@ -59,10 +60,21 @@ enum {
SEG6_LOCAL_ACTION_END_AS = 13,
/* forward to SR-unaware VNF with masquerading */
SEG6_LOCAL_ACTION_END_AM = 14,
+ /* custom BPF action */
+ SEG6_LOCAL_ACTION_END_BPF = 15,
__SEG6_LOCAL_ACTION_MAX,
};
#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+enum {
+ SEG6_LOCAL_BPF_PROG_UNSPEC,
+ SEG6_LOCAL_BPF_PROG,
+ SEG6_LOCAL_BPF_PROG_NAME,
+ __SEG6_LOCAL_BPF_PROG_MAX,
+};
+
+#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
+
#endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 0fd8d8f1a398..544e58f5f642 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
}
seq_printf(m, "%u: ", *(u32 *)key);
- btf_type_seq_show(map->btf, map->btf_value_id, value, m);
+ btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
rcu_read_unlock();
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ded10ab47b8a..7e90fd13b5b5 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -12,6 +12,7 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/idr.h>
+#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
@@ -162,13 +163,16 @@
#define BITS_ROUNDUP_BYTES(bits) \
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+#define BTF_INFO_MASK 0x0f00ffff
+#define BTF_INT_MASK 0x0fffffff
+#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
+#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
+
/* 16MB for 64k structs and each has 16 members and
* a few MB spaces for the string section.
* The hard limit is S32_MAX.
*/
#define BTF_MAX_SIZE (16 * 1024 * 1024)
-/* 64k. We can raise it later. The hard limit is S32_MAX. */
-#define BTF_MAX_NR_TYPES 65535
#define for_each_member(i, struct_type, member) \
for (i = 0, member = btf_type_member(struct_type); \
@@ -184,15 +188,13 @@ static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
struct btf {
- union {
- struct btf_header *hdr;
- void *data;
- };
+ void *data;
struct btf_type **types;
u32 *resolved_ids;
u32 *resolved_sizes;
const char *strings;
void *nohdr_data;
+ struct btf_header hdr;
u32 nr_types;
u32 types_size;
u32 data_size;
@@ -228,6 +230,11 @@ enum resolve_mode {
#define MAX_RESOLVE_DEPTH 32
+struct btf_sec_info {
+ u32 off;
+ u32 len;
+};
+
struct btf_verifier_env {
struct btf *btf;
u8 *visit_states;
@@ -379,8 +386,6 @@ static const char *btf_int_encoding_str(u8 encoding)
return "CHAR";
else if (encoding == BTF_INT_BOOL)
return "BOOL";
- else if (encoding == BTF_INT_VARARGS)
- return "VARARGS";
else
return "UNKN";
}
@@ -417,16 +422,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{
- return !BTF_STR_TBL_ELF_ID(offset) &&
- BTF_STR_OFFSET(offset) < btf->hdr->str_len;
+ return BTF_STR_OFFSET_VALID(offset) &&
+ offset < btf->hdr.str_len;
}
static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
{
- if (!BTF_STR_OFFSET(offset))
+ if (!offset)
return "(anon)";
- else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
- return &btf->strings[BTF_STR_OFFSET(offset)];
+ else if (offset < btf->hdr.str_len)
+ return &btf->strings[offset];
else
return "(invalid-name-offset)";
}
@@ -439,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return btf->types[type_id];
}
+/*
+ * Regular int is not a bit field and it must be either
+ * u8/u16/u32/u64.
+ */
+static bool btf_type_int_is_regular(const struct btf_type *t)
+{
+ u16 nr_bits, nr_bytes;
+ u32 int_data;
+
+ int_data = btf_type_int(t);
+ nr_bits = BTF_INT_BITS(int_data);
+ nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+ if (BITS_PER_BYTE_MASKED(nr_bits) ||
+ BTF_INT_OFFSET(int_data) ||
+ (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
+ nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
+ return false;
+ }
+
+ return true;
+}
+
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
const char *fmt, ...)
{
@@ -536,7 +563,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n");
}
-static void btf_verifier_log_hdr(struct btf_verifier_env *env)
+static void btf_verifier_log_hdr(struct btf_verifier_env *env,
+ u32 btf_data_size)
{
struct bpf_verifier_log *log = &env->log;
const struct btf *btf = env->btf;
@@ -545,19 +573,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
if (!bpf_verifier_log_needed(log))
return;
- hdr = btf->hdr;
+ hdr = &btf->hdr;
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
__btf_verifier_log(log, "version: %u\n", hdr->version);
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
- __btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
- __btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
- __btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
- __btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
- __btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
+ __btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
+ __btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
- __btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
+ __btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
}
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
@@ -574,13 +599,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
struct btf_type **new_types;
u32 expand_by, new_size;
- if (btf->types_size == BTF_MAX_NR_TYPES) {
+ if (btf->types_size == BTF_MAX_TYPE) {
btf_verifier_log(env, "Exceeded max num of types");
return -E2BIG;
}
expand_by = max_t(u32, btf->types_size >> 2, 16);
- new_size = min_t(u32, BTF_MAX_NR_TYPES,
+ new_size = min_t(u32, BTF_MAX_TYPE,
btf->types_size + expand_by);
new_types = kvzalloc(new_size * sizeof(*new_types),
@@ -910,6 +935,12 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
}
int_data = btf_type_int(t);
+ if (int_data & ~BTF_INT_MASK) {
+ btf_verifier_log_basic(env, t, "Invalid int_data:%x",
+ int_data);
+ return -EINVAL;
+ }
+
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
if (nr_bits > BITS_PER_U64) {
@@ -923,12 +954,17 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
+ /*
+ * Only one of the encoding bits is allowed and it
+ * should be sufficient for the pretty print purpose (i.e. decoding).
+ * Multiple bits can be allowed later if it is found
+ * to be insufficient.
+ */
encoding = BTF_INT_ENCODING(int_data);
if (encoding &&
encoding != BTF_INT_SIGNED &&
encoding != BTF_INT_CHAR &&
- encoding != BTF_INT_BOOL &&
- encoding != BTF_INT_VARARGS) {
+ encoding != BTF_INT_BOOL) {
btf_verifier_log_type(env, t, "Unsupported encoding");
return -ENOTSUPP;
}
@@ -1102,7 +1138,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (BTF_TYPE_PARENT(t->type)) {
+ if (!BTF_TYPE_ID_VALID(t->type)) {
btf_verifier_log_type(env, t, "Invalid type_id");
return -EINVAL;
}
@@ -1306,14 +1342,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- /* We are a little forgiving on array->index_type since
- * the kernel is not using it.
- */
- /* Array elem cannot be in type void,
- * so !array->type is not allowed.
+ /* Array elem type and index type cannot be in type void,
+ * so !array->type and !array->index_type are not allowed.
*/
- if (!array->type || BTF_TYPE_PARENT(array->type)) {
- btf_verifier_log_type(env, t, "Invalid type_id");
+ if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
+ btf_verifier_log_type(env, t, "Invalid elem");
+ return -EINVAL;
+ }
+
+ if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
+ btf_verifier_log_type(env, t, "Invalid index");
return -EINVAL;
}
@@ -1326,11 +1364,32 @@ static int btf_array_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
const struct btf_array *array = btf_type_array(v->t);
- const struct btf_type *elem_type;
- u32 elem_type_id = array->type;
+ const struct btf_type *elem_type, *index_type;
+ u32 elem_type_id, index_type_id;
struct btf *btf = env->btf;
u32 elem_size;
+ /* Check array->index_type */
+ index_type_id = array->index_type;
+ index_type = btf_type_by_id(btf, index_type_id);
+ if (btf_type_is_void_or_null(index_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid index");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, index_type) &&
+ !env_type_is_resolved(env, index_type_id))
+ return env_stack_push(env, index_type, index_type_id);
+
+ index_type = btf_type_id_size(btf, &index_type_id, NULL);
+ if (!index_type || !btf_type_is_int(index_type) ||
+ !btf_type_int_is_regular(index_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid index");
+ return -EINVAL;
+ }
+
+ /* Check array->type */
+ elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
if (btf_type_is_void_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
@@ -1348,22 +1407,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_is_int(elem_type)) {
- int int_type_data = btf_type_int(elem_type);
- u16 nr_bits = BTF_INT_BITS(int_type_data);
- u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
-
- /* Put more restriction on array of int. The int cannot
- * be a bit field and it must be either u8/u16/u32/u64.
- */
- if (BITS_PER_BYTE_MASKED(nr_bits) ||
- BTF_INT_OFFSET(int_type_data) ||
- (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
- nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
- btf_verifier_log_type(env, v->t,
- "Invalid array of int");
- return -EINVAL;
- }
+ if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid array of int");
+ return -EINVAL;
}
if (array->nelems && elem_size > U32_MAX / array->nelems) {
@@ -1473,7 +1519,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
}
/* A member cannot be in type void */
- if (!member->type || BTF_TYPE_PARENT(member->type)) {
+ if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
btf_verifier_log_member(env, t, member,
"Invalid type_id");
return -EINVAL;
@@ -1726,6 +1772,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
}
meta_left -= sizeof(*t);
+ if (t->info & ~BTF_INFO_MASK) {
+ btf_verifier_log(env, "[%u] Invalid btf_info:%x",
+ env->log_type_id, t->info);
+ return -EINVAL;
+ }
+
if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
btf_verifier_log(env, "[%u] Invalid kind:%u",
@@ -1754,9 +1806,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
struct btf_header *hdr;
void *cur, *end;
- hdr = btf->hdr;
+ hdr = &btf->hdr;
cur = btf->nohdr_data + hdr->type_off;
- end = btf->nohdr_data + hdr->str_off;
+ end = btf->nohdr_data + hdr->type_len;
env->log_type_id = 1;
while (cur < end) {
@@ -1866,8 +1918,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
static int btf_parse_type_sec(struct btf_verifier_env *env)
{
+ const struct btf_header *hdr = &env->btf->hdr;
int err;
+ /* Type section must align to 4 bytes */
+ if (hdr->type_off & (sizeof(u32) - 1)) {
+ btf_verifier_log(env, "Unaligned type_off");
+ return -EINVAL;
+ }
+
+ if (!hdr->type_len) {
+ btf_verifier_log(env, "No type found");
+ return -EINVAL;
+ }
+
err = btf_check_all_metas(env);
if (err)
return err;
@@ -1881,10 +1945,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
struct btf *btf = env->btf;
const char *start, *end;
- hdr = btf->hdr;
+ hdr = &btf->hdr;
start = btf->nohdr_data + hdr->str_off;
end = start + hdr->str_len;
+ if (end != btf->data + btf->data_size) {
+ btf_verifier_log(env, "String section is not at the end");
+ return -EINVAL;
+ }
+
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
start[0] || end[-1]) {
btf_verifier_log(env, "Invalid string section");
@@ -1896,20 +1965,121 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
return 0;
}
-static int btf_parse_hdr(struct btf_verifier_env *env)
+static const size_t btf_sec_info_offset[] = {
+ offsetof(struct btf_header, type_off),
+ offsetof(struct btf_header, str_off),
+};
+
+static int btf_sec_info_cmp(const void *a, const void *b)
+{
+ const struct btf_sec_info *x = a;
+ const struct btf_sec_info *y = b;
+
+ return (int)(x->off - y->off) ? : (int)(x->len - y->len);
+}
+
+static int btf_check_sec_info(struct btf_verifier_env *env,
+ u32 btf_data_size)
{
+ struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
+ u32 total, expected_total, i;
const struct btf_header *hdr;
- struct btf *btf = env->btf;
- u32 meta_left;
+ const struct btf *btf;
+
+ btf = env->btf;
+ hdr = &btf->hdr;
- if (btf->data_size < sizeof(*hdr)) {
+ /* Populate the secs from hdr */
+ for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
+ secs[i] = *(struct btf_sec_info *)((void *)hdr +
+ btf_sec_info_offset[i]);
+
+ sort(secs, ARRAY_SIZE(btf_sec_info_offset),
+ sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
+
+ /* Check for gaps and overlap among sections */
+ total = 0;
+ expected_total = btf_data_size - hdr->hdr_len;
+ for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
+ if (expected_total < secs[i].off) {
+ btf_verifier_log(env, "Invalid section offset");
+ return -EINVAL;
+ }
+ if (total < secs[i].off) {
+ /* gap */
+ btf_verifier_log(env, "Unsupported section found");
+ return -EINVAL;
+ }
+ if (total > secs[i].off) {
+ btf_verifier_log(env, "Section overlap found");
+ return -EINVAL;
+ }
+ if (expected_total - total < secs[i].len) {
+ btf_verifier_log(env,
+ "Total section length too long");
+ return -EINVAL;
+ }
+ total += secs[i].len;
+ }
+
+ /* There is data other than hdr and known sections */
+ if (expected_total != total) {
+ btf_verifier_log(env, "Unsupported section found");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
+ u32 btf_data_size)
+{
+ const struct btf_header *hdr;
+ u32 hdr_len, hdr_copy;
+ /*
+ * Minimal part of the "struct btf_header" that
+ * contains the hdr_len.
+ */
+ struct btf_min_header {
+ u16 magic;
+ u8 version;
+ u8 flags;
+ u32 hdr_len;
+ } __user *min_hdr;
+ struct btf *btf;
+ int err;
+
+ btf = env->btf;
+ min_hdr = btf_data;
+
+ if (btf_data_size < sizeof(*min_hdr)) {
+ btf_verifier_log(env, "hdr_len not found");
+ return -EINVAL;
+ }
+
+ if (get_user(hdr_len, &min_hdr->hdr_len))
+ return -EFAULT;
+
+ if (btf_data_size < hdr_len) {
btf_verifier_log(env, "btf_header not found");
return -EINVAL;
}
- btf_verifier_log_hdr(env);
+ err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
+ if (err) {
+ if (err == -E2BIG)
+ btf_verifier_log(env, "Unsupported btf_header");
+ return err;
+ }
+
+ hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
+ if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
+ return -EFAULT;
+
+ hdr = &btf->hdr;
+
+ btf_verifier_log_hdr(env, btf_data_size);
- hdr = btf->hdr;
if (hdr->magic != BTF_MAGIC) {
btf_verifier_log(env, "Invalid magic");
return -EINVAL;
@@ -1925,26 +2095,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return -ENOTSUPP;
}
- meta_left = btf->data_size - sizeof(*hdr);
- if (!meta_left) {
+ if (btf_data_size == hdr->hdr_len) {
btf_verifier_log(env, "No data");
return -EINVAL;
}
- if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
- /* Type section must align to 4 bytes */
- hdr->type_off & (sizeof(u32) - 1)) {
- btf_verifier_log(env, "Invalid type_off");
- return -EINVAL;
- }
-
- if (meta_left < hdr->str_off ||
- meta_left - hdr->str_off < hdr->str_len) {
- btf_verifier_log(env, "Invalid str_off or str_len");
- return -EINVAL;
- }
-
- btf->nohdr_data = btf->hdr + 1;
+ err = btf_check_sec_info(env, btf_data_size);
+ if (err)
+ return err;
return 0;
}
@@ -1987,6 +2145,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
err = -ENOMEM;
goto errout;
}
+ env->btf = btf;
+
+ err = btf_parse_hdr(env, btf_data, btf_data_size);
+ if (err)
+ goto errout;
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) {
@@ -1996,18 +2159,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
btf->data = data;
btf->data_size = btf_data_size;
+ btf->nohdr_data = btf->data + btf->hdr.hdr_len;
if (copy_from_user(data, btf_data, btf_data_size)) {
err = -EFAULT;
goto errout;
}
- env->btf = btf;
-
- err = btf_parse_hdr(env);
- if (err)
- goto errout;
-
err = btf_parse_str_sec(env);
if (err)
goto errout;
@@ -2016,16 +2174,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
if (err)
goto errout;
- if (!err && log->level && bpf_verifier_log_full(log)) {
+ if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC;
goto errout;
}
- if (!err) {
- btf_verifier_env_free(env);
- refcount_set(&btf->refcnt, 1);
- return btf;
- }
+ btf_verifier_env_free(env);
+ refcount_set(&btf->refcnt, 1);
+ return btf;
errout:
btf_verifier_env_free(env);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index c95b04ec103e..e0918d180f08 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
- xdp_return_frame(xdpf);
+ xdp_return_frame_rx_napi(xdpf);
}
processed++;
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 565f9ece9115..ae16d0c373ef 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -48,15 +48,25 @@
* calls will fail at this point.
*/
#include <linux/bpf.h>
+#include <net/xdp.h>
#include <linux/filter.h>
+#include <trace/events/xdp.h>
#define DEV_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define DEV_MAP_BULK_SIZE 16
+struct xdp_bulk_queue {
+ struct xdp_frame *q[DEV_MAP_BULK_SIZE];
+ struct net_device *dev_rx;
+ unsigned int count;
+};
+
struct bpf_dtab_netdev {
- struct net_device *dev;
+ struct net_device *dev; /* must be first member, due to tracepoint */
struct bpf_dtab *dtab;
unsigned int bit;
+ struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu;
};
@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
__set_bit(bit, bitmap);
}
+static int bq_xmit_all(struct bpf_dtab_netdev *obj,
+ struct xdp_bulk_queue *bq)
+{
+ struct net_device *dev = obj->dev;
+ int sent = 0, drops = 0, err = 0;
+ int i;
+
+ if (unlikely(!bq->count))
+ return 0;
+
+ for (i = 0; i < bq->count; i++) {
+ struct xdp_frame *xdpf = bq->q[i];
+
+ prefetch(xdpf);
+ }
+
+ sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
+ if (sent < 0) {
+ err = sent;
+ sent = 0;
+ goto error;
+ }
+ drops = bq->count - sent;
+out:
+ bq->count = 0;
+
+ trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
+ sent, drops, bq->dev_rx, dev, err);
+ bq->dev_rx = NULL;
+ return 0;
+error:
+ /* If ndo_xdp_xmit fails with an errno, no frames have been
+ * xmit'ed and it's our responsibility to them free all.
+ */
+ for (i = 0; i < bq->count; i++) {
+ struct xdp_frame *xdpf = bq->q[i];
+
+ /* RX path under NAPI protection, can return frames faster */
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ goto out;
+}
+
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
* from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled
@@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
+ struct xdp_bulk_queue *bq;
struct net_device *netdev;
/* This is possible if the dev entry is removed by user space
@@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map)
continue;
__clear_bit(bit, bitmap);
+
+ bq = this_cpu_ptr(dev->bulkq);
+ bq_xmit_all(dev, bq);
netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev);
@@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map)
* update happens in parallel here a dev_put wont happen until after reading the
* ifindex.
*/
-struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- struct bpf_dtab_netdev *dev;
+ struct bpf_dtab_netdev *obj;
if (key >= map->max_entries)
return NULL;
- dev = READ_ONCE(dtab->netdev_map[key]);
- return dev ? dev->dev : NULL;
+ obj = READ_ONCE(dtab->netdev_map[key]);
+ return obj;
+}
+
+/* Runs under RCU-read-side, plus in softirq under NAPI protection.
+ * Thus, safe percpu variable access.
+ */
+static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
+ struct net_device *dev_rx)
+
+{
+ struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
+
+ if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
+ bq_xmit_all(obj, bq);
+
+ /* Ingress dev_rx will be the same for all xdp_frame's in
+ * bulk_queue, because bq stored per-CPU and must be flushed
+ * from net_device drivers NAPI func end.
+ */
+ if (!bq->dev_rx)
+ bq->dev_rx = dev_rx;
+
+ bq->q[bq->count++] = xdpf;
+ return 0;
+}
+
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+ struct net_device *dev_rx)
+{
+ struct net_device *dev = dst->dev;
+ struct xdp_frame *xdpf;
+
+ if (!dev->netdev_ops->ndo_xdp_xmit)
+ return -EOPNOTSUPP;
+
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
+ return -EOVERFLOW;
+
+ return bq_enqueue(dst, xdpf, dev_rx);
}
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
- struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
+ struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
+ struct net_device *dev = dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL;
}
@@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
if (dev->dev->netdev_ops->ndo_xdp_flush) {
struct net_device *fl = dev->dev;
+ struct xdp_bulk_queue *bq;
unsigned long *bitmap;
+
int cpu;
for_each_online_cpu(cpu) {
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
__clear_bit(dev->bit, bitmap);
+ bq = per_cpu_ptr(dev->bulkq, cpu);
+ bq_xmit_all(dev, bq);
+
fl->netdev_ops->ndo_xdp_flush(dev->dev);
}
}
@@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
dev_map_flush_old(dev);
+ free_percpu(dev->bulkq);
dev_put(dev->dev);
kfree(dev);
}
@@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct net *net = current->nsproxy->net_ns;
+ gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev, *old_dev;
u32 i = *(u32 *)key;
u32 ifindex = *(u32 *)value;
@@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!ifindex) {
dev = NULL;
} else {
- dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
- map->numa_node);
+ dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
if (!dev)
return -ENOMEM;
+ dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
+ sizeof(void *), gfp);
+ if (!dev->bulkq) {
+ kfree(dev);
+ return -ENOMEM;
+ }
+
dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
+ free_percpu(dev->bulkq);
kfree(dev);
return -EINVAL;
}
@@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void)
{
+ /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
+ BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
+ offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier);
return 0;
}
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index cd832250a478..52a91d816c0e 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
}
bpf_compute_data_pointers_sg(md);
+ md->sk = sk;
rc = (*prog->bpf_func)(md, prog->insnsi);
psock->apply_bytes = md->apply_bytes;
@@ -1713,7 +1714,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock;
bool new = false;
- int err;
+ int err = 0;
/* 1. If sock map has BPF programs those will be inherited by the
* sock being added. If the sock is already attached to BPF programs
@@ -1823,7 +1824,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock);
return err;
out_free:
- kfree(e);
smap_release_sock(psock, sock);
out_progs:
if (parse && verdict) {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bfcde949c7f8..388d4feda348 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,7 +18,9 @@
#include <linux/vmalloc.h>
#include <linux/mmzone.h>
#include <linux/anon_inodes.h>
+#include <linux/fdtable.h>
#include <linux/file.h>
+#include <linux/fs.h>
#include <linux/license.h>
#include <linux/filter.h>
#include <linux/version.h>
@@ -65,9 +67,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
* copy_from_user() call. However, this is not a concern since this function is
* meant to be a future-proofing of bits.
*/
-static int check_uarg_tail_zero(void __user *uaddr,
- size_t expected_size,
- size_t actual_size)
+int bpf_check_uarg_tail_zero(void __user *uaddr,
+ size_t expected_size,
+ size_t actual_size)
{
unsigned char __user *addr;
unsigned char __user *end;
@@ -422,7 +424,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
-#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
+#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -457,10 +459,10 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->usercnt, 1);
if (bpf_map_support_seq_show(map) &&
- (attr->btf_key_id || attr->btf_value_id)) {
+ (attr->btf_key_type_id || attr->btf_value_type_id)) {
struct btf *btf;
- if (!attr->btf_key_id || !attr->btf_value_id) {
+ if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
@@ -471,16 +473,16 @@ static int map_create(union bpf_attr *attr)
goto free_map_nouncharge;
}
- err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
- attr->btf_value_id);
+ err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
+ attr->btf_value_type_id);
if (err) {
btf_put(btf);
goto free_map_nouncharge;
}
map->btf = btf;
- map->btf_key_id = attr->btf_key_id;
- map->btf_value_id = attr->btf_value_id;
+ map->btf_key_type_id = attr->btf_key_type_id;
+ map->btf_value_type_id = attr->btf_value_type_id;
}
err = security_bpf_map_alloc(map);
@@ -1899,7 +1901,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
u32 ulen;
int err;
- err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+ err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@@ -1933,6 +1935,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
if (!capable(CAP_SYS_ADMIN)) {
info.jited_prog_len = 0;
info.xlated_prog_len = 0;
+ info.nr_jited_ksyms = 0;
goto done;
}
@@ -1969,18 +1972,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
* for offload.
*/
ulen = info.jited_prog_len;
- info.jited_prog_len = prog->jited_len;
+ if (prog->aux->func_cnt) {
+ u32 i;
+
+ info.jited_prog_len = 0;
+ for (i = 0; i < prog->aux->func_cnt; i++)
+ info.jited_prog_len += prog->aux->func[i]->jited_len;
+ } else {
+ info.jited_prog_len = prog->jited_len;
+ }
+
if (info.jited_prog_len && ulen) {
if (bpf_dump_raw_ok()) {
uinsns = u64_to_user_ptr(info.jited_prog_insns);
ulen = min_t(u32, info.jited_prog_len, ulen);
- if (copy_to_user(uinsns, prog->bpf_func, ulen))
- return -EFAULT;
+
+ /* for multi-function programs, copy the JITed
+ * instructions for all the functions
+ */
+ if (prog->aux->func_cnt) {
+ u32 len, free, i;
+ u8 *img;
+
+ free = ulen;
+ for (i = 0; i < prog->aux->func_cnt; i++) {
+ len = prog->aux->func[i]->jited_len;
+ len = min_t(u32, len, free);
+ img = (u8 *) prog->aux->func[i]->bpf_func;
+ if (copy_to_user(uinsns, img, len))
+ return -EFAULT;
+ uinsns += len;
+ free -= len;
+ if (!free)
+ break;
+ }
+ } else {
+ if (copy_to_user(uinsns, prog->bpf_func, ulen))
+ return -EFAULT;
+ }
} else {
info.jited_prog_insns = 0;
}
}
+ ulen = info.nr_jited_ksyms;
+ info.nr_jited_ksyms = prog->aux->func_cnt;
+ if (info.nr_jited_ksyms && ulen) {
+ if (bpf_dump_raw_ok()) {
+ u64 __user *user_ksyms;
+ ulong ksym_addr;
+ u32 i;
+
+ /* copy the address of the kernel symbol
+ * corresponding to each function
+ */
+ ulen = min_t(u32, info.nr_jited_ksyms, ulen);
+ user_ksyms = u64_to_user_ptr(info.jited_ksyms);
+ for (i = 0; i < ulen; i++) {
+ ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
+ ksym_addr &= PAGE_MASK;
+ if (put_user((u64) ksym_addr, &user_ksyms[i]))
+ return -EFAULT;
+ }
+ } else {
+ info.jited_ksyms = 0;
+ }
+ }
+
+ ulen = info.nr_jited_func_lens;
+ info.nr_jited_func_lens = prog->aux->func_cnt;
+ if (info.nr_jited_func_lens && ulen) {
+ if (bpf_dump_raw_ok()) {
+ u32 __user *user_lens;
+ u32 func_len, i;
+
+ /* copy the JITed image lengths for each function */
+ ulen = min_t(u32, info.nr_jited_func_lens, ulen);
+ user_lens = u64_to_user_ptr(info.jited_func_lens);
+ for (i = 0; i < ulen; i++) {
+ func_len = prog->aux->func[i]->jited_len;
+ if (put_user(func_len, &user_lens[i]))
+ return -EFAULT;
+ }
+ } else {
+ info.jited_func_lens = 0;
+ }
+ }
+
done:
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
@@ -1998,7 +2076,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
u32 info_len = attr->info.info_len;
int err;
- err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+ err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@@ -2013,8 +2091,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
if (map->btf) {
info.btf_id = btf_id(map->btf);
- info.btf_key_id = map->btf_key_id;
- info.btf_value_id = map->btf_value_id;
+ info.btf_key_type_id = map->btf_key_type_id;
+ info.btf_value_type_id = map->btf_value_type_id;
}
if (bpf_map_is_dev_bound(map)) {
@@ -2038,7 +2116,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
u32 info_len = attr->info.info_len;
int err;
- err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+ err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
if (err)
return err;
@@ -2102,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
return btf_get_fd_by_id(attr->btf_id);
}
+static int bpf_task_fd_query_copy(const union bpf_attr *attr,
+ union bpf_attr __user *uattr,
+ u32 prog_id, u32 fd_type,
+ const char *buf, u64 probe_offset,
+ u64 probe_addr)
+{
+ char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
+ u32 len = buf ? strlen(buf) : 0, input_len;
+ int err = 0;
+
+ if (put_user(len, &uattr->task_fd_query.buf_len))
+ return -EFAULT;
+ input_len = attr->task_fd_query.buf_len;
+ if (input_len && ubuf) {
+ if (!len) {
+ /* nothing to copy, just make ubuf NULL terminated */
+ char zero = '\0';
+
+ if (put_user(zero, ubuf))
+ return -EFAULT;
+ } else if (input_len >= len + 1) {
+ /* ubuf can hold the string with NULL terminator */
+ if (copy_to_user(ubuf, buf, len + 1))
+ return -EFAULT;
+ } else {
+ /* ubuf cannot hold the string with NULL terminator,
+ * do a partial copy with NULL terminator.
+ */
+ char zero = '\0';
+
+ err = -ENOSPC;
+ if (copy_to_user(ubuf, buf, input_len - 1))
+ return -EFAULT;
+ if (put_user(zero, ubuf + input_len - 1))
+ return -EFAULT;
+ }
+ }
+
+ if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
+ put_user(fd_type, &uattr->task_fd_query.fd_type) ||
+ put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
+ put_user(probe_addr, &uattr->task_fd_query.probe_addr))
+ return -EFAULT;
+
+ return err;
+}
+
+#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
+
+static int bpf_task_fd_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ pid_t pid = attr->task_fd_query.pid;
+ u32 fd = attr->task_fd_query.fd;
+ const struct perf_event *event;
+ struct files_struct *files;
+ struct task_struct *task;
+ struct file *file;
+ int err;
+
+ if (CHECK_ATTR(BPF_TASK_FD_QUERY))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (attr->task_fd_query.flags != 0)
+ return -EINVAL;
+
+ task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+ if (!task)
+ return -ENOENT;
+
+ files = get_files_struct(task);
+ put_task_struct(task);
+ if (!files)
+ return -ENOENT;
+
+ err = 0;
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (!file)
+ err = -EBADF;
+ else
+ get_file(file);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+ if (err)
+ goto out;
+
+ if (file->f_op == &bpf_raw_tp_fops) {
+ struct bpf_raw_tracepoint *raw_tp = file->private_data;
+ struct bpf_raw_event_map *btp = raw_tp->btp;
+
+ err = bpf_task_fd_query_copy(attr, uattr,
+ raw_tp->prog->aux->id,
+ BPF_FD_TYPE_RAW_TRACEPOINT,
+ btp->tp->name, 0, 0);
+ goto put_file;
+ }
+
+ event = perf_get_event(file);
+ if (!IS_ERR(event)) {
+ u64 probe_offset, probe_addr;
+ u32 prog_id, fd_type;
+ const char *buf;
+
+ err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
+ &buf, &probe_offset,
+ &probe_addr);
+ if (!err)
+ err = bpf_task_fd_query_copy(attr, uattr, prog_id,
+ fd_type, buf,
+ probe_offset,
+ probe_addr);
+ goto put_file;
+ }
+
+ err = -ENOTSUPP;
+put_file:
+ fput(file);
+out:
+ return err;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -2110,7 +2314,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
return -EPERM;
- err = check_uarg_tail_zero(uattr, sizeof(attr), size);
+ err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
return err;
size = min_t(u32, size, sizeof(attr));
@@ -2188,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr);
break;
+ case BPF_TASK_FD_QUERY:
+ err = bpf_task_fd_query(&attr, uattr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a9e4b1372da6..967cacf286ea 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
switch (env->prog->type) {
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
/* dst_input() and dst_output() can't write for now */
if (t == BPF_WRITE)
return false;
@@ -5383,11 +5384,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
insn->src_reg != BPF_PSEUDO_CALL)
continue;
subprog = insn->off;
- insn->off = 0;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
func[subprog]->bpf_func -
__bpf_call_base;
}
+
+ /* we use the aux data to keep a list of the start addresses
+ * of the JITed images for each function in the program
+ *
+ * for some architectures, such as powerpc64, the imm field
+ * might not be large enough to hold the offset of the start
+ * address of the callee's JITed image from __bpf_call_base
+ *
+ * in such cases, we can lookup the start address of a callee
+ * by using its subprog id, available from the off field of
+ * the call instruction, as an index for this list
+ */
+ func[i]->aux->func = func;
+ func[i]->aux->func_cnt = env->subprog_cnt;
}
for (i = 0; i < env->subprog_cnt; i++) {
old_bpf_func = func[i]->bpf_func;
@@ -5413,17 +5427,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
- unsigned long addr;
-
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
continue;
insn->off = env->insn_aux_data[i].call_imm;
subprog = find_subprog(env, i + insn->off + 1);
- addr = (unsigned long)func[subprog]->bpf_func;
- addr &= PAGE_MASK;
- insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
- addr - __bpf_call_base;
+ insn->imm = subprog;
}
prog->jited = 1;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index cb3a12137404..b3c557476a8d 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -1,15 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* XSKMAP used for AF_XDP sockets
* Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/bpf.h>
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 67612ce359ad..6eeab86d24ba 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
return file;
}
+const struct perf_event *perf_get_event(struct file *file)
+{
+ if (file->f_op != &perf_fops)
+ return ERR_PTR(-EINVAL);
+
+ return file->private_data;
+}
+
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
if (!event)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ce2cbbff27e4..81fdf2fc94ac 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/kprobes.h>
+#include <linux/syscalls.h>
#include <linux/error-injection.h>
#include "trace_probe.h"
@@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
mutex_unlock(&bpf_event_mutex);
return err;
}
+
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+ u32 *fd_type, const char **buf,
+ u64 *probe_offset, u64 *probe_addr)
+{
+ bool is_tracepoint, is_syscall_tp;
+ struct bpf_prog *prog;
+ int flags, err = 0;
+
+ prog = event->prog;
+ if (!prog)
+ return -ENOENT;
+
+ /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
+ if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
+ return -EOPNOTSUPP;
+
+ *prog_id = prog->aux->id;
+ flags = event->tp_event->flags;
+ is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
+ is_syscall_tp = is_syscall_trace_event(event->tp_event);
+
+ if (is_tracepoint || is_syscall_tp) {
+ *buf = is_tracepoint ? event->tp_event->tp->name
+ : event->tp_event->name;
+ *fd_type = BPF_FD_TYPE_TRACEPOINT;
+ *probe_offset = 0x0;
+ *probe_addr = 0x0;
+ } else {
+ /* kprobe/uprobe */
+ err = -EOPNOTSUPP;
+#ifdef CONFIG_KPROBE_EVENTS
+ if (flags & TRACE_EVENT_FL_KPROBE)
+ err = bpf_get_kprobe_info(event, fd_type, buf,
+ probe_offset, probe_addr,
+ event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+#ifdef CONFIG_UPROBE_EVENTS
+ if (flags & TRACE_EVENT_FL_UPROBE)
+ err = bpf_get_uprobe_info(event, fd_type, buf,
+ probe_offset,
+ event->attr.type == PERF_TYPE_TRACEPOINT);
+#endif
+ }
+
+ return err;
+}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 02aed76e0978..daa81571b22a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
head, NULL);
}
NOKPROBE_SYMBOL(kretprobe_perf_func);
+
+int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
+ const char **symbol, u64 *probe_offset,
+ u64 *probe_addr, bool perf_type_tracepoint)
+{
+ const char *pevent = trace_event_name(event->tp_event);
+ const char *group = event->tp_event->class->system;
+ struct trace_kprobe *tk;
+
+ if (perf_type_tracepoint)
+ tk = find_trace_kprobe(pevent, group);
+ else
+ tk = event->tp_event->data;
+ if (!tk)
+ return -EINVAL;
+
+ *fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
+ : BPF_FD_TYPE_KPROBE;
+ if (tk->symbol) {
+ *symbol = tk->symbol;
+ *probe_offset = tk->rp.kp.offset;
+ *probe_addr = 0;
+ } else {
+ *symbol = NULL;
+ *probe_offset = 0;
+ *probe_addr = (unsigned long)tk->rp.kp.addr;
+ }
+ return 0;
+}
#endif /* CONFIG_PERF_EVENTS */
/*
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index ac892878dbe6..bf89a51e740d 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
{
__uprobe_perf_func(tu, func, regs, ucb, dsize);
}
+
+int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+ const char **filename, u64 *probe_offset,
+ bool perf_type_tracepoint)
+{
+ const char *pevent = trace_event_name(event->tp_event);
+ const char *group = event->tp_event->class->system;
+ struct trace_uprobe *tu;
+
+ if (perf_type_tracepoint)
+ tu = find_probe_event(pevent, group);
+ else
+ tu = event->tp_event->data;
+ if (!tu)
+ return -EINVAL;
+
+ *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
+ : BPF_FD_TYPE_UPROBE;
+ *filename = tu->filename;
+ *probe_offset = tu->offset;
+ return 0;
+}
#endif /* CONFIG_PERF_EVENTS */
static int
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7a7fd672ccf2..9019f326fe81 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -30,7 +30,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
- nbp_switchdev_allowed_egress(p, skb);
+ nbp_switchdev_allowed_egress(p, skb) &&
+ !br_skb_isolated(p, skb);
}
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7f98a7d25866..72074276c088 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -114,6 +114,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
goto drop;
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
+ BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED);
if (IS_ENABLED(CONFIG_INET) &&
(skb->protocol == htons(ETH_P_ARP) ||
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 015f465c514b..9f5eb05b0373 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -139,6 +139,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
+ nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
+ + nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -213,7 +214,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
BR_VLAN_TUNNEL)) ||
nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
- !!(p->flags & BR_NEIGH_SUPPRESS)))
+ !!(p->flags & BR_NEIGH_SUPPRESS)) ||
+ nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -660,6 +662,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 },
[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
+ [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 },
};
/* Change the state of the port and notify spanning tree */
@@ -810,6 +813,10 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
if (err)
return err;
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
+ if (err)
+ return err;
+
br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 742f40aefdaf..11520ed528b0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -423,6 +423,7 @@ struct br_input_skb_cb {
#endif
bool proxyarp_replied;
+ bool src_port_isolated;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool vlan_filtered;
@@ -574,6 +575,14 @@ int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
void br_flood(struct net_bridge *br, struct sk_buff *skb,
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
+/* return true if both source port and dest port are isolated */
+static inline bool br_skb_isolated(const struct net_bridge_port *to,
+ const struct sk_buff *skb)
+{
+ return BR_INPUT_SKB_CB(skb)->src_port_isolated &&
+ (to->flags & BR_ISOLATED);
+}
+
/* br_if.c */
void br_port_carrier_check(struct net_bridge_port *p, bool *notified);
int br_add_bridge(struct net *net, const char *name);
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index fd31ad83ec7b..f99c5bf5c906 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -192,6 +192,7 @@ BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
+BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -243,6 +244,7 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_broadcast_flood,
&brport_attr_group_fwd_mask,
&brport_attr_neigh_suppress,
+ &brport_attr_isolated,
NULL
};
diff --git a/net/core/filter.c b/net/core/filter.c
index 51ea7ddb2d8d..acf1f4fb99d1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -64,6 +64,10 @@
#include <net/ip_fib.h>
#include <net/flow.h>
#include <net/arp.h>
+#include <net/ipv6.h>
+#include <linux/seg6_local.h>
+#include <net/seg6.h>
+#include <net/seg6_local.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -3042,7 +3046,7 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index)
{
struct xdp_frame *xdpf;
- int err;
+ int sent;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
@@ -3052,9 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
if (unlikely(!xdpf))
return -EOVERFLOW;
- err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
- if (err)
- return err;
+ sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
+ if (sent <= 0)
+ return sent;
dev->netdev_ops->ndo_xdp_flush(dev);
return 0;
}
@@ -3068,20 +3072,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP: {
- struct net_device *dev = fwd;
- struct xdp_frame *xdpf;
+ struct bpf_dtab_netdev *dst = fwd;
- if (!dev->netdev_ops->ndo_xdp_xmit)
- return -EOPNOTSUPP;
-
- xdpf = convert_to_xdp_frame(xdp);
- if (unlikely(!xdpf))
- return -EOVERFLOW;
-
- /* TODO: move to inside map code instead, for bulk support
- * err = dev_map_enqueue(dev, xdp);
- */
- err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
+ err = dev_map_enqueue(dst, xdp, dev_rx);
if (err)
return err;
__dev_map_insert_ctx(map, index);
@@ -3370,28 +3363,6 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
.arg3_type = ARG_ANYTHING,
};
-bool bpf_helper_changes_pkt_data(void *func)
-{
- if (func == bpf_skb_vlan_push ||
- func == bpf_skb_vlan_pop ||
- func == bpf_skb_store_bytes ||
- func == bpf_skb_change_proto ||
- func == bpf_skb_change_head ||
- func == bpf_skb_change_tail ||
- func == bpf_skb_adjust_room ||
- func == bpf_skb_pull_data ||
- func == bpf_clone_redirect ||
- func == bpf_l3_csum_replace ||
- func == bpf_l4_csum_replace ||
- func == bpf_xdp_adjust_head ||
- func == bpf_xdp_adjust_meta ||
- func == bpf_msg_pull_data ||
- func == bpf_xdp_adjust_tail)
- return true;
-
- return false;
-}
-
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
unsigned long off, unsigned long len)
{
@@ -4096,7 +4067,7 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
#if IS_ENABLED(CONFIG_INET)
static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
- u32 flags)
+ u32 flags, bool check_mtu)
{
struct in_device *in_dev;
struct neighbour *neigh;
@@ -4105,6 +4076,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
struct fib_nh *nh;
struct flowi4 fl4;
int err;
+ u32 mtu;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
@@ -4156,6 +4128,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL);
+ if (check_mtu) {
+ mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
+ if (params->tot_len > mtu)
+ return 0;
+ }
+
nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */
@@ -4184,7 +4162,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#if IS_ENABLED(CONFIG_IPV6)
static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
- u32 flags)
+ u32 flags, bool check_mtu)
{
struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
@@ -4195,6 +4173,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
struct flowi6 fl6;
int strict = 0;
int oif;
+ u32 mtu;
/* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src))
@@ -4257,6 +4236,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl6.flowi6_oif, NULL,
strict);
+ if (check_mtu) {
+ mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
+ if (params->tot_len > mtu)
+ return 0;
+ }
+
if (f6i->fib6_nh.nh_lwtstate)
return 0;
@@ -4289,12 +4274,12 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
- flags);
+ flags, true);
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
- flags);
+ flags, true);
#endif
}
return 0;
@@ -4313,20 +4298,34 @@ static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
+ struct net *net = dev_net(skb->dev);
+ int index = 0;
+
if (plen < sizeof(*params))
return -EINVAL;
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
- return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
+ index = bpf_ipv4_fib_lookup(net, params, flags, false);
+ break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
+ index = bpf_ipv6_fib_lookup(net, params, flags, false);
+ break;
#endif
}
- return -ENOTSUPP;
+
+ if (index > 0) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(net, index);
+ if (!is_skb_forwardable(dev, skb))
+ index = 0;
+ }
+
+ return index;
}
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
@@ -4339,6 +4338,264 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
.arg4_type = ARG_ANYTHING,
};
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+{
+ int err;
+ struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
+
+ if (!seg6_validate_srh(srh, len))
+ return -EINVAL;
+
+ switch (type) {
+ case BPF_LWT_ENCAP_SEG6_INLINE:
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return -EBADMSG;
+
+ err = seg6_do_srh_inline(skb, srh);
+ break;
+ case BPF_LWT_ENCAP_SEG6:
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ bpf_compute_data_pointers(skb);
+ if (err)
+ return err;
+
+ ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+ return seg6_lookup_nexthop(skb, NULL, 0);
+}
+#endif /* CONFIG_IPV6_SEG6_BPF */
+
+BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+ u32, len)
+{
+ switch (type) {
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+ case BPF_LWT_ENCAP_SEG6:
+ case BPF_LWT_ENCAP_SEG6_INLINE:
+ return bpf_push_seg6_encap(skb, type, hdr, len);
+#endif
+ default:
+ return -EINVAL;
+ }
+}
+
+static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
+ .func = bpf_lwt_push_encap,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE
+};
+
+BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
+ const void *, from, u32, len)
+{
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+ struct seg6_bpf_srh_state *srh_state =
+ this_cpu_ptr(&seg6_bpf_srh_states);
+ void *srh_tlvs, *srh_end, *ptr;
+ struct ipv6_sr_hdr *srh;
+ int srhoff = 0;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return -EINVAL;
+
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
+ srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
+
+ ptr = skb->data + offset;
+ if (ptr >= srh_tlvs && ptr + len <= srh_end)
+ srh_state->valid = 0;
+ else if (ptr < (void *)&srh->flags ||
+ ptr + len > (void *)&srh->segments)
+ return -EFAULT;
+
+ if (unlikely(bpf_try_make_writable(skb, offset + len)))
+ return -EFAULT;
+
+ memcpy(skb->data + offset, from, len);
+ return 0;
+#else /* CONFIG_IPV6_SEG6_BPF */
+ return -EOPNOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
+ .func = bpf_lwt_seg6_store_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE
+};
+
+BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
+ u32, action, void *, param, u32, param_len)
+{
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+ struct seg6_bpf_srh_state *srh_state =
+ this_cpu_ptr(&seg6_bpf_srh_states);
+ struct ipv6_sr_hdr *srh;
+ int srhoff = 0;
+ int err;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return -EINVAL;
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+ if (!srh_state->valid) {
+ if (unlikely((srh_state->hdrlen & 7) != 0))
+ return -EBADMSG;
+
+ srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
+ if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+ return -EBADMSG;
+
+ srh_state->valid = 1;
+ }
+
+ switch (action) {
+ case SEG6_LOCAL_ACTION_END_X:
+ if (param_len != sizeof(struct in6_addr))
+ return -EINVAL;
+ return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
+ case SEG6_LOCAL_ACTION_END_T:
+ if (param_len != sizeof(int))
+ return -EINVAL;
+ return seg6_lookup_nexthop(skb, NULL, *(int *)param);
+ case SEG6_LOCAL_ACTION_END_B6:
+ err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
+ param, param_len);
+ if (!err)
+ srh_state->hdrlen =
+ ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+ return err;
+ case SEG6_LOCAL_ACTION_END_B6_ENCAP:
+ err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
+ param, param_len);
+ if (!err)
+ srh_state->hdrlen =
+ ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+ return err;
+ default:
+ return -EINVAL;
+ }
+#else /* CONFIG_IPV6_SEG6_BPF */
+ return -EOPNOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
+ .func = bpf_lwt_seg6_action,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_CONST_SIZE
+};
+
+BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
+ s32, len)
+{
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+ struct seg6_bpf_srh_state *srh_state =
+ this_cpu_ptr(&seg6_bpf_srh_states);
+ void *srh_end, *srh_tlvs, *ptr;
+ struct ipv6_sr_hdr *srh;
+ struct ipv6hdr *hdr;
+ int srhoff = 0;
+ int ret;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return -EINVAL;
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+ srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
+ ((srh->first_segment + 1) << 4));
+ srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
+ srh_state->hdrlen);
+ ptr = skb->data + offset;
+
+ if (unlikely(ptr < srh_tlvs || ptr > srh_end))
+ return -EFAULT;
+ if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
+ return -EFAULT;
+
+ if (len > 0) {
+ ret = skb_cow_head(skb, len);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = bpf_skb_net_hdr_push(skb, offset, len);
+ } else {
+ ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
+ }
+
+ bpf_compute_data_pointers(skb);
+ if (unlikely(ret < 0))
+ return ret;
+
+ hdr = (struct ipv6hdr *)skb->data;
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+ srh_state->hdrlen += len;
+ srh_state->valid = 0;
+ return 0;
+#else /* CONFIG_IPV6_SEG6_BPF */
+ return -EOPNOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
+ .func = bpf_lwt_seg6_adjust_srh,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
+{
+ if (func == bpf_skb_vlan_push ||
+ func == bpf_skb_vlan_pop ||
+ func == bpf_skb_store_bytes ||
+ func == bpf_skb_change_proto ||
+ func == bpf_skb_change_head ||
+ func == bpf_skb_change_tail ||
+ func == bpf_skb_adjust_room ||
+ func == bpf_skb_pull_data ||
+ func == bpf_clone_redirect ||
+ func == bpf_l3_csum_replace ||
+ func == bpf_l4_csum_replace ||
+ func == bpf_xdp_adjust_head ||
+ func == bpf_xdp_adjust_meta ||
+ func == bpf_msg_pull_data ||
+ func == bpf_xdp_adjust_tail ||
+ func == bpf_lwt_push_encap ||
+ func == bpf_lwt_seg6_store_bytes ||
+ func == bpf_lwt_seg6_adjust_srh ||
+ func == bpf_lwt_seg6_action
+ )
+ return true;
+
+ return false;
+}
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -4523,33 +4780,6 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
static const struct bpf_func_proto *
-lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
- switch (func_id) {
- case BPF_FUNC_skb_load_bytes:
- return &bpf_skb_load_bytes_proto;
- case BPF_FUNC_skb_pull_data:
- return &bpf_skb_pull_data_proto;
- case BPF_FUNC_csum_diff:
- return &bpf_csum_diff_proto;
- case BPF_FUNC_get_cgroup_classid:
- return &bpf_get_cgroup_classid_proto;
- case BPF_FUNC_get_route_realm:
- return &bpf_get_route_realm_proto;
- case BPF_FUNC_get_hash_recalc:
- return &bpf_get_hash_recalc_proto;
- case BPF_FUNC_perf_event_output:
- return &bpf_skb_event_output_proto;
- case BPF_FUNC_get_smp_processor_id:
- return &bpf_get_smp_processor_id_proto;
- case BPF_FUNC_skb_under_cgroup:
- return &bpf_skb_under_cgroup_proto;
- default:
- return bpf_base_func_proto(func_id);
- }
-}
-
-static const struct bpf_func_proto *
sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
@@ -4615,6 +4845,44 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
static const struct bpf_func_proto *
+lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ case BPF_FUNC_skb_pull_data:
+ return &bpf_skb_pull_data_proto;
+ case BPF_FUNC_csum_diff:
+ return &bpf_csum_diff_proto;
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_proto;
+ case BPF_FUNC_get_route_realm:
+ return &bpf_get_route_realm_proto;
+ case BPF_FUNC_get_hash_recalc:
+ return &bpf_get_hash_recalc_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_skb_event_output_proto;
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_skb_under_cgroup:
+ return &bpf_skb_under_cgroup_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
+lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_lwt_push_encap:
+ return &bpf_lwt_push_encap_proto;
+ default:
+ return lwt_out_func_proto(func_id, prog);
+ }
+}
+
+static const struct bpf_func_proto *
lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
@@ -4645,7 +4913,22 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
default:
- return lwt_inout_func_proto(func_id, prog);
+ return lwt_out_func_proto(func_id, prog);
+ }
+}
+
+static const struct bpf_func_proto *
+lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_lwt_seg6_store_bytes:
+ return &bpf_lwt_seg6_store_bytes_proto;
+ case BPF_FUNC_lwt_seg6_action:
+ return &bpf_lwt_seg6_action_proto;
+ case BPF_FUNC_lwt_seg6_adjust_srh:
+ return &bpf_lwt_seg6_adjust_srh_proto;
+ default:
+ return lwt_out_func_proto(func_id, prog);
}
}
@@ -4753,7 +5036,6 @@ static bool lwt_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
-
/* Attach type specific accesses */
static bool __sock_filter_check_attach_type(int off,
enum bpf_access_type access_type,
@@ -5155,18 +5437,23 @@ static bool sk_msg_is_valid_access(int off, int size,
switch (off) {
case offsetof(struct sk_msg_md, data):
info->reg_type = PTR_TO_PACKET;
+ if (size != sizeof(__u64))
+ return false;
break;
case offsetof(struct sk_msg_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
+ if (size != sizeof(__u64))
+ return false;
break;
+ default:
+ if (size != sizeof(__u32))
+ return false;
}
if (off < 0 || off >= sizeof(struct sk_msg_md))
return false;
if (off % size != 0)
return false;
- if (size != sizeof(__u64))
- return false;
return true;
}
@@ -5842,7 +6129,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -6159,6 +6447,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ int off;
switch (si->off) {
case offsetof(struct sk_msg_md, data):
@@ -6171,6 +6460,107 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, data_end));
break;
+ case offsetof(struct sk_msg_md, family):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_family));
+ break;
+
+ case offsetof(struct sk_msg_md, remote_ip4):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_daddr));
+ break;
+
+ case offsetof(struct sk_msg_md, local_ip4):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_rcv_saddr));
+ break;
+
+ case offsetof(struct sk_msg_md, remote_ip6[0]) ...
+ offsetof(struct sk_msg_md, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_v6_daddr.s6_addr32[0]) != 4);
+
+ off = si->off;
+ off -= offsetof(struct sk_msg_md, remote_ip6[0]);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_v6_daddr.s6_addr32[0]) +
+ off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct sk_msg_md, local_ip6[0]) ...
+ offsetof(struct sk_msg_md, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+ off = si->off;
+ off -= offsetof(struct sk_msg_md, local_ip6[0]);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common,
+ skc_v6_rcv_saddr.s6_addr32[0]) +
+ off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+
+ case offsetof(struct sk_msg_md, remote_port):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_dport));
+#ifndef __BIG_ENDIAN_BITFIELD
+ *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+ break;
+
+ case offsetof(struct sk_msg_md, local_port):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_num));
+ break;
}
return insn - insn_buf;
@@ -6219,13 +6609,23 @@ const struct bpf_prog_ops cg_skb_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_inout_verifier_ops = {
- .get_func_proto = lwt_inout_func_proto,
+const struct bpf_verifier_ops lwt_in_verifier_ops = {
+ .get_func_proto = lwt_in_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-const struct bpf_prog_ops lwt_inout_prog_ops = {
+const struct bpf_prog_ops lwt_in_prog_ops = {
+ .test_run = bpf_prog_test_run_skb,
+};
+
+const struct bpf_verifier_ops lwt_out_verifier_ops = {
+ .get_func_proto = lwt_out_func_proto,
+ .is_valid_access = lwt_is_valid_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_out_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
@@ -6240,6 +6640,16 @@ const struct bpf_prog_ops lwt_xmit_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
+const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
+ .get_func_proto = lwt_seg6local_func_proto,
+ .is_valid_access = lwt_is_valid_access,
+ .convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_seg6local_prog_ops = {
+ .test_run = bpf_prog_test_run_skb,
+};
+
const struct bpf_verifier_ops cg_sock_verifier_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 380934580fa1..419af6dfe29f 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -35,10 +35,6 @@
#include <trace/events/tcp.h>
#include <trace/events/fib.h>
#include <trace/events/qdisc.h>
-#if IS_ENABLED(CONFIG_IPV6)
-#include <trace/events/fib6.h>
-EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
-#endif
#if IS_ENABLED(CONFIG_BRIDGE)
#include <trace/events/bridge.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bf6758f74339..cb8c4e061a5a 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -308,7 +308,13 @@ err:
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
-static void xdp_return(void *data, struct xdp_mem_info *mem)
+/* XDP RX runs under NAPI protection, and in different delivery error
+ * scenarios (e.g. queue full), it is possible to return the xdp_frame
+ * while still leveraging this protection. The @napi_direct boolian
+ * is used for those calls sites. Thus, allowing for faster recycling
+ * of xdp_frames/pages in those cases.
+ */
+static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
{
struct xdp_mem_allocator *xa;
struct page *page;
@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (xa)
- page_pool_put_page(xa->page_pool, page);
+ page_pool_put_page(xa->page_pool, page, napi_direct);
else
put_page(page);
rcu_read_unlock();
@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
void xdp_return_frame(struct xdp_frame *xdpf)
{
- xdp_return(xdpf->data, &xdpf->mem);
+ __xdp_return(xdpf->data, &xdpf->mem, false);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
+{
+ __xdp_return(xdpf->data, &xdpf->mem, true);
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
+
void xdp_return_buff(struct xdp_buff *xdp)
{
- xdp_return(xdp->data, &xdp->rxq->mem);
+ __xdp_return(xdp->data, &xdp->rxq->mem, true);
}
EXPORT_SYMBOL_GPL(xdp_return_buff);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 897ae92dff0f..045c43a27c12 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -354,8 +354,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.fl4_dport = 0;
}
- trace_fib_validate_source(dev, &fl4);
-
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
if (res.type != RTN_UNICAST &&
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3dcffd3ce98c..65c340f230ae 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1326,14 +1326,14 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
unsigned long index;
t_key cindex;
- trace_fib_table_lookup(tb->tb_id, flp);
-
pn = t->kv;
cindex = 0;
n = get_child_rcu(pn, cindex);
- if (!n)
+ if (!n) {
+ trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
return -EAGAIN;
+ }
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->gets);
@@ -1416,8 +1416,11 @@ backtrace:
* nothing for us to do as we do not have any
* further nodes to parse.
*/
- if (IS_TRIE(pn))
+ if (IS_TRIE(pn)) {
+ trace_fib_table_lookup(tb->tb_id, flp,
+ NULL, -EAGAIN);
return -EAGAIN;
+ }
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->backtrack);
#endif
@@ -1459,6 +1462,7 @@ found:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
+ trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
return err;
}
if (fi->fib_flags & RTNH_F_DEAD)
@@ -1494,7 +1498,7 @@ found:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
- trace_fib_table_lookup_nh(nh);
+ trace_fib_table_lookup(tb->tb_id, flp, nh, err);
return err;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0e401dc4e1bd..45ad2585eb28 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL;
}
+/* MTU selection:
+ * 1. mtu on route is locked - use it
+ * 2. mtu from nexthop exception
+ * 3. mtu from egress device
+ */
+
+u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
+{
+ struct fib_info *fi = res->fi;
+ struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
+ struct net_device *dev = nh->nh_dev;
+ u32 mtu = 0;
+
+ if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
+ mtu = fi->fib_mtu;
+
+ if (likely(!mtu)) {
+ struct fib_nh_exception *fnhe;
+
+ fnhe = find_exception(nh, daddr);
+ if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
+ mtu = fnhe->fnhe_pmtu;
+ }
+
+ if (likely(!mtu))
+ mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
+}
+
static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr, const bool do_cache)
{
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 11e4e80cf7e9..0eff75525da1 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -329,4 +329,9 @@ config IPV6_SEG6_HMAC
If unsure, say N.
+config IPV6_SEG6_BPF
+ def_bool y
+ depends on IPV6_SEG6_LWTUNNEL
+ depends on IPV6 = y
+
endif # IPV6
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 2fe754fd4f5e..5cd0029d930e 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
return f6i;
}
+static u32
+eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+ struct in6_addr *saddr)
+{
+ return 0;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
.fib6_multipath_select = eafnosupport_fib6_multipath_select,
+ .ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 50de8b0d4f70..9ed0eae91758 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
.fib6_multipath_select = fib6_multipath_select,
+ .ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 038d661d5ffc..22c4de2317d0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -64,14 +64,19 @@
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
#include <net/ip.h>
-#include <trace/events/fib6.h>
-
#include <linux/uaccess.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
+static int ip6_rt_type_to_error(u8 fib6_type);
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/fib6.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
+#undef CREATE_TRACE_POINTS
+
enum rt6_nud_state {
RT6_NUD_FAIL_HARD = -3,
RT6_NUD_FAIL_PROBE = -2,
@@ -2604,6 +2609,54 @@ out:
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
+/* MTU selection:
+ * 1. mtu on route is locked - use it
+ * 2. mtu from nexthop exception
+ * 3. mtu from egress device
+ *
+ * based on ip6_dst_mtu_forward and exception logic of
+ * rt6_find_cached_rt; called with rcu_read_lock
+ */
+u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
+ struct in6_addr *saddr)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct in6_addr *src_key;
+ struct inet6_dev *idev;
+ u32 mtu = 0;
+
+ if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
+ mtu = f6i->fib6_pmtu;
+ if (mtu)
+ goto out;
+ }
+
+ src_key = NULL;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (f6i->fib6_src.plen)
+ src_key = saddr;
+#endif
+
+ bucket = rcu_dereference(f6i->rt6i_exception_bucket);
+ rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
+ if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
+ mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
+
+ if (likely(!mtu)) {
+ struct net_device *dev = fib6_info_nh_dev(f6i);
+
+ mtu = IPV6_MIN_MTU;
+ idev = __in6_dev_get(dev);
+ if (idev && idev->cnf.mtu6 > mtu)
+ mtu = idev->cnf.mtu6;
+ }
+
+ mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+out:
+ return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
+}
+
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
struct flowi6 *fl6)
{
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 45722327375a..cd6e4cab63f6 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -1,8 +1,9 @@
/*
* SR-IPv6 implementation
*
- * Author:
+ * Authors:
* David Lebrun <[email protected]>
+ * eBPF support: Mathieu Xhonneux <[email protected]>
*
*
* This program is free software; you can redistribute it and/or
@@ -30,7 +31,9 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
+#include <net/seg6_local.h>
#include <linux/etherdevice.h>
+#include <linux/bpf.h>
struct seg6_local_lwt;
@@ -41,6 +44,11 @@ struct seg6_action_desc {
int static_headroom;
};
+struct bpf_lwt_prog {
+ struct bpf_prog *prog;
+ char *name;
+};
+
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
@@ -49,6 +57,7 @@ struct seg6_local_lwt {
struct in6_addr nh6;
int iif;
int oif;
+ struct bpf_lwt_prog bpf;
int headroom;
struct seg6_action_desc *desc;
@@ -140,8 +149,8 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
*daddr = *addr;
}
-static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
- u32 tbl_id)
+int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -187,6 +196,7 @@ out:
skb_dst_drop(skb);
skb_dst_set(skb, dst);
+ return dst->error;
}
/* regular endpoint function */
@@ -200,7 +210,7 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- lookup_nexthop(skb, NULL, 0);
+ seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@@ -220,7 +230,7 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- lookup_nexthop(skb, &slwt->nh6, 0);
+ seg6_lookup_nexthop(skb, &slwt->nh6, 0);
return dst_input(skb);
@@ -239,7 +249,7 @@ static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- lookup_nexthop(skb, NULL, slwt->table);
+ seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
@@ -331,7 +341,7 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
- lookup_nexthop(skb, nhaddr, 0);
+ seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
drop:
@@ -380,7 +390,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
- lookup_nexthop(skb, NULL, slwt->table);
+ seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
@@ -406,7 +416,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
- lookup_nexthop(skb, NULL, 0);
+ seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@@ -438,7 +448,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
- lookup_nexthop(skb, NULL, 0);
+ seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@@ -447,6 +457,71 @@ drop:
return err;
}
+DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+
+static int input_action_end_bpf(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_bpf_srh_state *srh_state =
+ this_cpu_ptr(&seg6_bpf_srh_states);
+ struct seg6_bpf_srh_state local_srh_state;
+ struct ipv6_sr_hdr *srh;
+ int srhoff = 0;
+ int ret;
+
+ srh = get_and_validate_srh(skb);
+ if (!srh)
+ goto drop;
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ /* preempt_disable is needed to protect the per-CPU buffer srh_state,
+ * which is also accessed by the bpf_lwt_seg6_* helpers
+ */
+ preempt_disable();
+ srh_state->hdrlen = srh->hdrlen << 3;
+ srh_state->valid = 1;
+
+ rcu_read_lock();
+ bpf_compute_data_pointers(skb);
+ ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
+ rcu_read_unlock();
+
+ local_srh_state = *srh_state;
+ preempt_enable();
+
+ switch (ret) {
+ case BPF_OK:
+ case BPF_REDIRECT:
+ break;
+ case BPF_DROP:
+ goto drop;
+ default:
+ pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
+ goto drop;
+ }
+
+ if (unlikely((local_srh_state.hdrlen & 7) != 0))
+ goto drop;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ goto drop;
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
+
+ if (!local_srh_state.valid &&
+ unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+ goto drop;
+
+ if (ret != BPF_REDIRECT)
+ seg6_lookup_nexthop(skb, NULL, 0);
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
static struct seg6_action_desc seg6_action_table[] = {
{
.action = SEG6_LOCAL_ACTION_END,
@@ -493,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = {
.attrs = (1 << SEG6_LOCAL_SRH),
.input = input_action_end_b6_encap,
.static_headroom = sizeof(struct ipv6hdr),
- }
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_BPF,
+ .attrs = (1 << SEG6_LOCAL_BPF),
+ .input = input_action_end_bpf,
+ },
+
};
static struct seg6_action_desc *__get_action_desc(int action)
@@ -538,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
.len = sizeof(struct in6_addr) },
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
+ [SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
};
static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@@ -715,6 +797,75 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0;
}
+#define MAX_PROG_NAME 256
+static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
+ [SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
+ [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
+ .len = MAX_PROG_NAME },
+};
+
+static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+ struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
+ struct bpf_prog *p;
+ int ret;
+ u32 fd;
+
+ ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
+ attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
+ return -EINVAL;
+
+ slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
+ if (!slwt->bpf.name)
+ return -ENOMEM;
+
+ fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
+ p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
+ if (IS_ERR(p)) {
+ kfree(slwt->bpf.name);
+ return PTR_ERR(p);
+ }
+
+ slwt->bpf.prog = p;
+ return 0;
+}
+
+static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct nlattr *nest;
+
+ if (!slwt->bpf.prog)
+ return 0;
+
+ nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
+ return -EMSGSIZE;
+
+ if (slwt->bpf.name &&
+ nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
+ return -EMSGSIZE;
+
+ return nla_nest_end(skb, nest);
+}
+
+static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ if (!a->bpf.name && !b->bpf.name)
+ return 0;
+
+ if (!a->bpf.name || !b->bpf.name)
+ return 1;
+
+ return strcmp(a->bpf.name, b->bpf.name);
+}
+
struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
@@ -745,6 +896,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
.put = put_nla_oif,
.cmp = cmp_nla_oif },
+
+ [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
+ .put = put_nla_bpf,
+ .cmp = cmp_nla_bpf },
+
};
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@@ -830,6 +986,13 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
kfree(slwt->srh);
+
+ if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
+ kfree(slwt->bpf.name);
+ bpf_prog_put(slwt->bpf.prog);
+ }
+
+ return;
}
static int seg6_local_fill_encap(struct sk_buff *skb,
@@ -882,6 +1045,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
if (attrs & (1 << SEG6_LOCAL_OIF))
nlsize += nla_total_size(4);
+ if (attrs & (1 << SEG6_LOCAL_BPF))
+ nlsize += nla_total_size(sizeof(struct nlattr)) +
+ nla_total_size(MAX_PROG_NAME) +
+ nla_total_size(4);
+
return nlsize;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 963e4bf0aab8..a4a5ace834c3 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -103,9 +103,10 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
}
EXPORT_SYMBOL(unregister_tcf_proto_ops);
-bool tcf_queue_work(struct work_struct *work)
+bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
{
- return queue_work(tc_filter_wq, work);
+ INIT_RCU_WORK(rwork, func);
+ return queue_rcu_work(tc_filter_wq, rwork);
}
EXPORT_SYMBOL(tcf_queue_work);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6b7ab3512f5b..95367f37098d 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -35,10 +35,7 @@ struct basic_filter {
struct tcf_result res;
struct tcf_proto *tp;
struct list_head link;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -97,21 +94,14 @@ static void __basic_delete_filter(struct basic_filter *f)
static void basic_delete_filter_work(struct work_struct *work)
{
- struct basic_filter *f = container_of(work, struct basic_filter, work);
-
+ struct basic_filter *f = container_of(to_rcu_work(work),
+ struct basic_filter,
+ rwork);
rtnl_lock();
__basic_delete_filter(f);
rtnl_unlock();
}
-static void basic_delete_filter(struct rcu_head *head)
-{
- struct basic_filter *f = container_of(head, struct basic_filter, rcu);
-
- INIT_WORK(&f->work, basic_delete_filter_work);
- tcf_queue_work(&f->work);
-}
-
static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
@@ -122,7 +112,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
tcf_unbind_filter(tp, &f->res);
idr_remove(&head->handle_idr, f->handle);
if (tcf_exts_get_net(&f->exts))
- call_rcu(&f->rcu, basic_delete_filter);
+ tcf_queue_work(&f->rwork, basic_delete_filter_work);
else
__basic_delete_filter(f);
}
@@ -140,7 +130,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
tcf_unbind_filter(tp, &f->res);
idr_remove(&head->handle_idr, f->handle);
tcf_exts_get_net(&f->exts);
- call_rcu(&f->rcu, basic_delete_filter);
+ tcf_queue_work(&f->rwork, basic_delete_filter_work);
*last = list_empty(&head->flist);
return 0;
}
@@ -234,7 +224,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
list_replace_rcu(&fold->link, &fnew->link);
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
- call_rcu(&fold->rcu, basic_delete_filter);
+ tcf_queue_work(&fold->rwork, basic_delete_filter_work);
} else {
list_add_rcu(&fnew->link, &head->flist);
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index b07c1fa8bc0d..1aa7f6511065 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -49,10 +49,7 @@ struct cls_bpf_prog {
struct sock_filter *bpf_ops;
const char *bpf_name;
struct tcf_proto *tp;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -275,21 +272,14 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
static void cls_bpf_delete_prog_work(struct work_struct *work)
{
- struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
-
+ struct cls_bpf_prog *prog = container_of(to_rcu_work(work),
+ struct cls_bpf_prog,
+ rwork);
rtnl_lock();
__cls_bpf_delete_prog(prog);
rtnl_unlock();
}
-static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
-{
- struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
-
- INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
- tcf_queue_work(&prog->work);
-}
-
static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct netlink_ext_ack *extack)
{
@@ -300,7 +290,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
if (tcf_exts_get_net(&prog->exts))
- call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
+ tcf_queue_work(&prog->rwork, cls_bpf_delete_prog_work);
else
__cls_bpf_delete_prog(prog);
}
@@ -526,7 +516,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
tcf_exts_get_net(&oldprog->exts);
- call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
+ tcf_queue_work(&oldprog->rwork, cls_bpf_delete_prog_work);
} else {
list_add_rcu(&prog->link, &head->plist);
}
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 762da5c0cf5e..3bc01bdde165 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,10 +23,7 @@ struct cls_cgroup_head {
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
struct tcf_proto *tp;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -70,24 +67,14 @@ static void __cls_cgroup_destroy(struct cls_cgroup_head *head)
static void cls_cgroup_destroy_work(struct work_struct *work)
{
- struct cls_cgroup_head *head = container_of(work,
+ struct cls_cgroup_head *head = container_of(to_rcu_work(work),
struct cls_cgroup_head,
- work);
+ rwork);
rtnl_lock();
__cls_cgroup_destroy(head);
rtnl_unlock();
}
-static void cls_cgroup_destroy_rcu(struct rcu_head *root)
-{
- struct cls_cgroup_head *head = container_of(root,
- struct cls_cgroup_head,
- rcu);
-
- INIT_WORK(&head->work, cls_cgroup_destroy_work);
- tcf_queue_work(&head->work);
-}
-
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -134,7 +121,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
rcu_assign_pointer(tp->root, new);
if (head) {
tcf_exts_get_net(&head->exts);
- call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ tcf_queue_work(&head->rwork, cls_cgroup_destroy_work);
}
return 0;
errout:
@@ -151,7 +138,7 @@ static void cls_cgroup_destroy(struct tcf_proto *tp,
/* Head can still be NULL due to cls_cgroup_init(). */
if (head) {
if (tcf_exts_get_net(&head->exts))
- call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
+ tcf_queue_work(&head->rwork, cls_cgroup_destroy_work);
else
__cls_cgroup_destroy(head);
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index cd5fe383afdd..2bb043cd436b 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,10 +57,7 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hashrnd;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
static inline u32 addr_fold(void *addr)
@@ -383,21 +380,14 @@ static void __flow_destroy_filter(struct flow_filter *f)
static void flow_destroy_filter_work(struct work_struct *work)
{
- struct flow_filter *f = container_of(work, struct flow_filter, work);
-
+ struct flow_filter *f = container_of(to_rcu_work(work),
+ struct flow_filter,
+ rwork);
rtnl_lock();
__flow_destroy_filter(f);
rtnl_unlock();
}
-static void flow_destroy_filter(struct rcu_head *head)
-{
- struct flow_filter *f = container_of(head, struct flow_filter, rcu);
-
- INIT_WORK(&f->work, flow_destroy_filter_work);
- tcf_queue_work(&f->work);
-}
-
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -563,7 +553,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
tcf_exts_get_net(&fold->exts);
- call_rcu(&fold->rcu, flow_destroy_filter);
+ tcf_queue_work(&fold->rwork, flow_destroy_filter_work);
}
return 0;
@@ -583,7 +573,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
list_del_rcu(&f->list);
tcf_exts_get_net(&f->exts);
- call_rcu(&f->rcu, flow_destroy_filter);
+ tcf_queue_work(&f->rwork, flow_destroy_filter_work);
*last = list_empty(&head->filters);
return 0;
}
@@ -608,7 +598,7 @@ static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
if (tcf_exts_get_net(&f->exts))
- call_rcu(&f->rcu, flow_destroy_filter);
+ tcf_queue_work(&f->rwork, flow_destroy_filter_work);
else
__flow_destroy_filter(f);
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index eacaaf803914..4e74508515f4 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -73,10 +73,7 @@ struct fl_flow_mask {
struct cls_fl_head {
struct rhashtable ht;
struct list_head masks;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
struct idr handle_idr;
};
@@ -90,10 +87,7 @@ struct cls_fl_filter {
struct list_head list;
u32 handle;
u32 flags;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
struct net_device *hw_dev;
};
@@ -235,21 +229,14 @@ static void __fl_destroy_filter(struct cls_fl_filter *f)
static void fl_destroy_filter_work(struct work_struct *work)
{
- struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
+ struct cls_fl_filter *f = container_of(to_rcu_work(work),
+ struct cls_fl_filter, rwork);
rtnl_lock();
__fl_destroy_filter(f);
rtnl_unlock();
}
-static void fl_destroy_filter(struct rcu_head *head)
-{
- struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
-
- INIT_WORK(&f->work, fl_destroy_filter_work);
- tcf_queue_work(&f->work);
-}
-
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
struct netlink_ext_ack *extack)
{
@@ -327,7 +314,7 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
fl_hw_destroy_filter(tp, f, extack);
tcf_unbind_filter(tp, &f->res);
if (async)
- call_rcu(&f->rcu, fl_destroy_filter);
+ tcf_queue_work(&f->rwork, fl_destroy_filter_work);
else
__fl_destroy_filter(f);
@@ -336,20 +323,13 @@ static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
static void fl_destroy_sleepable(struct work_struct *work)
{
- struct cls_fl_head *head = container_of(work, struct cls_fl_head,
- work);
+ struct cls_fl_head *head = container_of(to_rcu_work(work),
+ struct cls_fl_head,
+ rwork);
kfree(head);
module_put(THIS_MODULE);
}
-static void fl_destroy_rcu(struct rcu_head *rcu)
-{
- struct cls_fl_head *head = container_of(rcu, struct cls_fl_head, rcu);
-
- INIT_WORK(&head->work, fl_destroy_sleepable);
- schedule_work(&head->work);
-}
-
static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -365,7 +345,7 @@ static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
idr_destroy(&head->handle_idr);
__module_get(THIS_MODULE);
- call_rcu(&head->rcu, fl_destroy_rcu);
+ tcf_queue_work(&head->rwork, fl_destroy_sleepable);
}
static void *fl_get(struct tcf_proto *tp, u32 handle)
@@ -1036,7 +1016,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
list_replace_rcu(&fold->list, &fnew->list);
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
- call_rcu(&fold->rcu, fl_destroy_filter);
+ tcf_queue_work(&fold->rwork, fl_destroy_filter_work);
} else {
list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 8b207723fbc2..29eeeaf3ea44 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -47,10 +47,7 @@ struct fw_filter {
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
struct tcf_proto *tp;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
static u32 fw_hash(u32 handle)
@@ -134,21 +131,14 @@ static void __fw_delete_filter(struct fw_filter *f)
static void fw_delete_filter_work(struct work_struct *work)
{
- struct fw_filter *f = container_of(work, struct fw_filter, work);
-
+ struct fw_filter *f = container_of(to_rcu_work(work),
+ struct fw_filter,
+ rwork);
rtnl_lock();
__fw_delete_filter(f);
rtnl_unlock();
}
-static void fw_delete_filter(struct rcu_head *head)
-{
- struct fw_filter *f = container_of(head, struct fw_filter, rcu);
-
- INIT_WORK(&f->work, fw_delete_filter_work);
- tcf_queue_work(&f->work);
-}
-
static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
@@ -164,7 +154,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
if (tcf_exts_get_net(&f->exts))
- call_rcu(&f->rcu, fw_delete_filter);
+ tcf_queue_work(&f->rwork, fw_delete_filter_work);
else
__fw_delete_filter(f);
}
@@ -193,7 +183,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
RCU_INIT_POINTER(*fp, rtnl_dereference(f->next));
tcf_unbind_filter(tp, &f->res);
tcf_exts_get_net(&f->exts);
- call_rcu(&f->rcu, fw_delete_filter);
+ tcf_queue_work(&f->rwork, fw_delete_filter_work);
ret = 0;
break;
}
@@ -316,7 +306,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
rcu_assign_pointer(*fp, fnew);
tcf_unbind_filter(tp, &f->res);
tcf_exts_get_net(&f->exts);
- call_rcu(&f->rcu, fw_delete_filter);
+ tcf_queue_work(&f->rwork, fw_delete_filter_work);
*arg = fnew;
return err;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 2ba721a590a7..47b207ef7762 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,10 +21,7 @@ struct cls_mall_head {
struct tcf_result res;
u32 handle;
u32 flags;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -53,22 +50,14 @@ static void __mall_destroy(struct cls_mall_head *head)
static void mall_destroy_work(struct work_struct *work)
{
- struct cls_mall_head *head = container_of(work, struct cls_mall_head,
- work);
+ struct cls_mall_head *head = container_of(to_rcu_work(work),
+ struct cls_mall_head,
+ rwork);
rtnl_lock();
__mall_destroy(head);
rtnl_unlock();
}
-static void mall_destroy_rcu(struct rcu_head *rcu)
-{
- struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
- rcu);
-
- INIT_WORK(&head->work, mall_destroy_work);
- tcf_queue_work(&head->work);
-}
-
static void mall_destroy_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long cookie,
@@ -126,7 +115,7 @@ static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
mall_destroy_hw_filter(tp, head, (unsigned long) head, extack);
if (tcf_exts_get_net(&head->exts))
- call_rcu(&head->rcu, mall_destroy_rcu);
+ tcf_queue_work(&head->rwork, mall_destroy_work);
else
__mall_destroy(head);
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 21a03a8ee029..0404aa5fa7cb 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -57,10 +57,7 @@ struct route4_filter {
u32 handle;
struct route4_bucket *bkt;
struct tcf_proto *tp;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -266,19 +263,17 @@ static void __route4_delete_filter(struct route4_filter *f)
static void route4_delete_filter_work(struct work_struct *work)
{
- struct route4_filter *f = container_of(work, struct route4_filter, work);
-
+ struct route4_filter *f = container_of(to_rcu_work(work),
+ struct route4_filter,
+ rwork);
rtnl_lock();
__route4_delete_filter(f);
rtnl_unlock();
}
-static void route4_delete_filter(struct rcu_head *head)
+static void route4_queue_work(struct route4_filter *f)
{
- struct route4_filter *f = container_of(head, struct route4_filter, rcu);
-
- INIT_WORK(&f->work, route4_delete_filter_work);
- tcf_queue_work(&f->work);
+ tcf_queue_work(&f->rwork, route4_delete_filter_work);
}
static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
@@ -304,7 +299,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
RCU_INIT_POINTER(b->ht[h2], next);
tcf_unbind_filter(tp, &f->res);
if (tcf_exts_get_net(&f->exts))
- call_rcu(&f->rcu, route4_delete_filter);
+ route4_queue_work(f);
else
__route4_delete_filter(f);
}
@@ -349,7 +344,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
/* Delete it */
tcf_unbind_filter(tp, &f->res);
tcf_exts_get_net(&f->exts);
- call_rcu(&f->rcu, route4_delete_filter);
+ tcf_queue_work(&f->rwork, route4_delete_filter_work);
/* Strip RTNL protected tree */
for (i = 0; i <= 32; i++) {
@@ -554,7 +549,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
tcf_unbind_filter(tp, &fold->res);
tcf_exts_get_net(&fold->exts);
- call_rcu(&fold->rcu, route4_delete_filter);
+ tcf_queue_work(&fold->rwork, route4_delete_filter_work);
}
return 0;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 4f1297657c27..e9ccf7daea7d 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -97,10 +97,7 @@ struct rsvp_filter {
u32 handle;
struct rsvp_session *sess;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -294,21 +291,14 @@ static void __rsvp_delete_filter(struct rsvp_filter *f)
static void rsvp_delete_filter_work(struct work_struct *work)
{
- struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
-
+ struct rsvp_filter *f = container_of(to_rcu_work(work),
+ struct rsvp_filter,
+ rwork);
rtnl_lock();
__rsvp_delete_filter(f);
rtnl_unlock();
}
-static void rsvp_delete_filter_rcu(struct rcu_head *head)
-{
- struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
-
- INIT_WORK(&f->work, rsvp_delete_filter_work);
- tcf_queue_work(&f->work);
-}
-
static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
{
tcf_unbind_filter(tp, &f->res);
@@ -317,7 +307,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
* in cleanup() callback
*/
if (tcf_exts_get_net(&f->exts))
- call_rcu(&f->rcu, rsvp_delete_filter_rcu);
+ tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
else
__rsvp_delete_filter(f);
}
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index b49cc990a000..32f4bbd82f35 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -28,20 +28,14 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
struct tcindex_filter __rcu *next;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
};
@@ -152,21 +146,14 @@ static void tcindex_destroy_rexts_work(struct work_struct *work)
{
struct tcindex_filter_result *r;
- r = container_of(work, struct tcindex_filter_result, work);
+ r = container_of(to_rcu_work(work),
+ struct tcindex_filter_result,
+ rwork);
rtnl_lock();
__tcindex_destroy_rexts(r);
rtnl_unlock();
}
-static void tcindex_destroy_rexts(struct rcu_head *head)
-{
- struct tcindex_filter_result *r;
-
- r = container_of(head, struct tcindex_filter_result, rcu);
- INIT_WORK(&r->work, tcindex_destroy_rexts_work);
- tcf_queue_work(&r->work);
-}
-
static void __tcindex_destroy_fexts(struct tcindex_filter *f)
{
tcf_exts_destroy(&f->result.exts);
@@ -176,23 +163,15 @@ static void __tcindex_destroy_fexts(struct tcindex_filter *f)
static void tcindex_destroy_fexts_work(struct work_struct *work)
{
- struct tcindex_filter *f = container_of(work, struct tcindex_filter,
- work);
+ struct tcindex_filter *f = container_of(to_rcu_work(work),
+ struct tcindex_filter,
+ rwork);
rtnl_lock();
__tcindex_destroy_fexts(f);
rtnl_unlock();
}
-static void tcindex_destroy_fexts(struct rcu_head *head)
-{
- struct tcindex_filter *f = container_of(head, struct tcindex_filter,
- rcu);
-
- INIT_WORK(&f->work, tcindex_destroy_fexts_work);
- tcf_queue_work(&f->work);
-}
-
static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
struct netlink_ext_ack *extack)
{
@@ -228,12 +207,12 @@ found:
*/
if (f) {
if (tcf_exts_get_net(&f->result.exts))
- call_rcu(&f->rcu, tcindex_destroy_fexts);
+ tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
else
__tcindex_destroy_fexts(f);
} else {
if (tcf_exts_get_net(&r->exts))
- call_rcu(&r->rcu, tcindex_destroy_rexts);
+ tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
else
__tcindex_destroy_rexts(r);
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index bac47b5d18fd..fb861f90fde6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -68,10 +68,7 @@ struct tc_u_knode {
u32 __percpu *pcpu_success;
#endif
struct tcf_proto *tp;
- union {
- struct work_struct work;
- struct rcu_head rcu;
- };
+ struct rcu_work rwork;
/* The 'sel' field MUST be the last field in structure to allow for
* tc_u32_keys allocated at end of structure.
*/
@@ -436,21 +433,14 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
*/
static void u32_delete_key_work(struct work_struct *work)
{
- struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
-
+ struct tc_u_knode *key = container_of(to_rcu_work(work),
+ struct tc_u_knode,
+ rwork);
rtnl_lock();
u32_destroy_key(key->tp, key, false);
rtnl_unlock();
}
-static void u32_delete_key_rcu(struct rcu_head *rcu)
-{
- struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
-
- INIT_WORK(&key->work, u32_delete_key_work);
- tcf_queue_work(&key->work);
-}
-
/* u32_delete_key_freepf_rcu is the rcu callback variant
* that free's the entire structure including the statistics
* percpu variables. Only use this if the key is not a copy
@@ -460,21 +450,14 @@ static void u32_delete_key_rcu(struct rcu_head *rcu)
*/
static void u32_delete_key_freepf_work(struct work_struct *work)
{
- struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
-
+ struct tc_u_knode *key = container_of(to_rcu_work(work),
+ struct tc_u_knode,
+ rwork);
rtnl_lock();
u32_destroy_key(key->tp, key, true);
rtnl_unlock();
}
-static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
-{
- struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
-
- INIT_WORK(&key->work, u32_delete_key_freepf_work);
- tcf_queue_work(&key->work);
-}
-
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
{
struct tc_u_knode __rcu **kp;
@@ -491,7 +474,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
tcf_unbind_filter(tp, &key->res);
idr_remove(&ht->handle_idr, key->handle);
tcf_exts_get_net(&key->exts);
- call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
+ tcf_queue_work(&key->rwork, u32_delete_key_freepf_work);
return 0;
}
}
@@ -611,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
u32_remove_hw_knode(tp, n, extack);
idr_remove(&ht->handle_idr, n->handle);
if (tcf_exts_get_net(&n->exts))
- call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
+ tcf_queue_work(&n->rwork, u32_delete_key_freepf_work);
else
u32_destroy_key(n->tp, n, true);
}
@@ -995,7 +978,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
tcf_exts_get_net(&n->exts);
- call_rcu(&n->rcu, u32_delete_key_rcu);
+ tcf_queue_work(&n->rwork, u32_delete_key_work);
return 0;
}
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 074fb2b2d51c..04f073146256 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1,2 +1 @@
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
-
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 2b47a1dd7c6c..87998818116f 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -1,15 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/init.h>
@@ -25,39 +16,25 @@
#define XDP_UMEM_MIN_FRAME_SIZE 2048
-int xdp_umem_create(struct xdp_umem **umem)
-{
- *umem = kzalloc(sizeof(**umem), GFP_KERNEL);
-
- if (!(*umem))
- return -ENOMEM;
-
- return 0;
-}
-
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unsigned int i;
- if (umem->pgs) {
- for (i = 0; i < umem->npgs; i++) {
- struct page *page = umem->pgs[i];
-
- set_page_dirty_lock(page);
- put_page(page);
- }
+ for (i = 0; i < umem->npgs; i++) {
+ struct page *page = umem->pgs[i];
- kfree(umem->pgs);
- umem->pgs = NULL;
+ set_page_dirty_lock(page);
+ put_page(page);
}
+
+ kfree(umem->pgs);
+ umem->pgs = NULL;
}
static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
{
- if (umem->user) {
- atomic_long_sub(umem->npgs, &umem->user->locked_vm);
- free_uid(umem->user);
- }
+ atomic_long_sub(umem->npgs, &umem->user->locked_vm);
+ free_uid(umem->user);
}
static void xdp_umem_release(struct xdp_umem *umem)
@@ -75,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
- if (umem->pgs) {
- xdp_umem_unpin_pages(umem);
-
- task = get_pid_task(umem->pid, PIDTYPE_PID);
- put_pid(umem->pid);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
+ xdp_umem_unpin_pages(umem);
- mmput(mm);
- umem->pgs = NULL;
- }
+ task = get_pid_task(umem->pid, PIDTYPE_PID);
+ put_pid(umem->pid);
+ if (!task)
+ goto out;
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ if (!mm)
+ goto out;
+ mmput(mm);
xdp_umem_unaccount_pages(umem);
out:
kfree(umem);
@@ -105,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
void xdp_get_umem(struct xdp_umem *umem)
{
- atomic_inc(&umem->users);
+ refcount_inc(&umem->users);
}
void xdp_put_umem(struct xdp_umem *umem)
@@ -113,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
if (!umem)
return;
- if (atomic_dec_and_test(&umem->users)) {
+ if (refcount_dec_and_test(&umem->users)) {
INIT_WORK(&umem->work, xdp_umem_release_deferred);
schedule_work(&umem->work);
}
@@ -176,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
-int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
+static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
u64 addr = mr->addr, size = mr->len;
unsigned int nframes, nfpp;
int size_chk, err;
- if (!umem)
- return -EINVAL;
-
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
* - huge pages, or*
@@ -236,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->frame_size_log2 = ilog2(frame_size);
umem->nfpp_mask = nfpp - 1;
umem->nfpplog2 = ilog2(nfpp);
- atomic_set(&umem->users, 1);
+ refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
if (err)
@@ -254,7 +224,25 @@ out:
return err;
}
+struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
+{
+ struct xdp_umem *umem;
+ int err;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ err = xdp_umem_reg(umem, mr);
+ if (err) {
+ kfree(umem);
+ return ERR_PTR(err);
+ }
+
+ return umem;
+}
+
bool xdp_umem_validate_queues(struct xdp_umem *umem)
{
- return (umem->fq && umem->cq);
+ return umem->fq && umem->cq;
}
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 7e0b2fab8522..0881cf456230 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -1,15 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0
- * XDP user-space packet buffer
+/* SPDX-License-Identifier: GPL-2.0 */
+/* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef XDP_UMEM_H_
@@ -36,7 +27,7 @@ struct xdp_umem {
struct pid *pid;
unsigned long address;
size_t size;
- atomic_t users;
+ refcount_t users;
struct work_struct work;
};
@@ -59,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
}
bool xdp_umem_validate_queues(struct xdp_umem *umem);
-int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
-int xdp_umem_create(struct xdp_umem **umem);
+struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h
index 77fb5daf29f3..2cf8ec485fd2 100644
--- a/net/xdp/xdp_umem_props.h
+++ b/net/xdp/xdp_umem_props.h
@@ -1,15 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0
- * XDP user-space packet buffer
+/* SPDX-License-Identifier: GPL-2.0 */
+/* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef XDP_UMEM_PROPS_H_
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 009c5af5bba5..cce0e4f8a536 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -5,15 +5,6 @@
* applications.
* Copyright(c) 2018 Intel Corporation.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* Author(s): Björn Töpel <[email protected]>
* Magnus Karlsson <[email protected]>
*/
@@ -151,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
+ if (xs->queue_id >= xs->dev->real_num_tx_queues) {
+ err = -ENXIO;
+ goto out;
+ }
+
skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
@@ -232,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
if (!q)
return -ENOMEM;
+ /* Make sure queue is ready before it can be seen by others */
+ smp_wmb();
*queue = q;
return 0;
}
-static void __xsk_release(struct xdp_sock *xs)
-{
- /* Wait for driver to stop using the xdp socket. */
- synchronize_net();
-
- dev_put(xs->dev);
-}
-
static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -260,7 +250,9 @@ static int xsk_release(struct socket *sock)
local_bh_enable();
if (xs->dev) {
- __xsk_release(xs);
+ /* Wait for driver to stop using the xdp socket. */
+ synchronize_net();
+ dev_put(xs->dev);
xs->dev = NULL;
}
@@ -294,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
struct sock *sk = sock->sk;
- struct net_device *dev, *dev_curr;
struct xdp_sock *xs = xdp_sk(sk);
- struct xdp_umem *old_umem = NULL;
+ struct net_device *dev;
int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp))
@@ -305,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
mutex_lock(&xs->mutex);
- dev_curr = xs->dev;
+ if (xs->dev) {
+ err = -EBUSY;
+ goto out_release;
+ }
+
dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
if (!dev) {
err = -ENODEV;
@@ -317,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
}
- if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
+ if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
+ (xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
err = -EINVAL;
goto out_unlock;
}
@@ -352,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
xdp_get_umem(umem_xs->umem);
- old_umem = xs->umem;
xs->umem = umem_xs->umem;
sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
@@ -364,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xskq_set_umem(xs->umem->cq, &xs->umem->props);
}
- /* Rebind? */
- if (dev_curr && (dev_curr != dev ||
- xs->queue_id != sxdp->sxdp_queue_id)) {
- __xsk_release(xs);
- if (old_umem)
- xdp_put_umem(old_umem);
- }
-
xs->dev = dev;
xs->queue_id = sxdp->sxdp_queue_id;
@@ -419,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xdp_umem_reg mr;
struct xdp_umem *umem;
- if (xs->umem)
- return -EBUSY;
-
if (copy_from_user(&mr, optval, sizeof(mr)))
return -EFAULT;
mutex_lock(&xs->mutex);
- err = xdp_umem_create(&umem);
+ if (xs->umem) {
+ mutex_unlock(&xs->mutex);
+ return -EBUSY;
+ }
- err = xdp_umem_reg(umem, &mr);
- if (err) {
- kfree(umem);
+ umem = xdp_umem_create(&mr);
+ if (IS_ERR(umem)) {
mutex_unlock(&xs->mutex);
- return err;
+ return PTR_ERR(umem);
}
/* Make sure umem is ready before it can be seen by others */
smp_wmb();
-
xs->umem = umem;
mutex_unlock(&xs->mutex);
return 0;
@@ -448,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
- if (!xs->umem)
- return -EINVAL;
-
if (copy_from_user(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
+ if (!xs->umem) {
+ mutex_unlock(&xs->mutex);
+ return -EINVAL;
+ }
+
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq;
err = xsk_init_queue(entries, q, true);
@@ -504,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+ case XDP_MMAP_OFFSETS:
+ {
+ struct xdp_mmap_offsets off;
+
+ if (len < sizeof(off))
+ return -EINVAL;
+
+ off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+ off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+ off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
+ off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+ off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+ off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
+
+ off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+ off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+ off.fr.desc = offsetof(struct xdp_umem_ring, desc);
+ off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+ off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+ off.cr.desc = offsetof(struct xdp_umem_ring, desc);
+
+ len = sizeof(off);
+ if (copy_to_user(optval, &off, len))
+ return -EFAULT;
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return 0;
+ }
default:
break;
}
@@ -518,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL;
+ struct xdp_umem *umem;
unsigned long pfn;
struct page *qpg;
if (offset == XDP_PGOFF_RX_RING) {
- q = xs->rx;
+ q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) {
- q = xs->tx;
+ q = READ_ONCE(xs->tx);
} else {
- if (!xs->umem)
+ umem = READ_ONCE(xs->umem);
+ if (!umem)
return -EINVAL;
if (offset == XDP_UMEM_PGOFF_FILL_RING)
- q = xs->umem->fq;
+ q = READ_ONCE(umem->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
- q = xs->umem->cq;
+ q = READ_ONCE(umem->cq);
}
if (!q)
@@ -554,24 +572,24 @@ static struct proto xsk_proto = {
};
static const struct proto_ops xsk_proto_ops = {
- .family = PF_XDP,
- .owner = THIS_MODULE,
- .release = xsk_release,
- .bind = xsk_bind,
- .connect = sock_no_connect,
- .socketpair = sock_no_socketpair,
- .accept = sock_no_accept,
- .getname = sock_no_getname,
- .poll = xsk_poll,
- .ioctl = sock_no_ioctl,
- .listen = sock_no_listen,
- .shutdown = sock_no_shutdown,
- .setsockopt = xsk_setsockopt,
- .getsockopt = xsk_getsockopt,
- .sendmsg = xsk_sendmsg,
- .recvmsg = sock_no_recvmsg,
- .mmap = xsk_mmap,
- .sendpage = sock_no_sendpage,
+ .family = PF_XDP,
+ .owner = THIS_MODULE,
+ .release = xsk_release,
+ .bind = xsk_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = xsk_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = xsk_setsockopt,
+ .getsockopt = xsk_getsockopt,
+ .sendmsg = xsk_sendmsg,
+ .recvmsg = sock_no_recvmsg,
+ .mmap = xsk_mmap,
+ .sendpage = sock_no_sendpage,
};
static void xsk_destruct(struct sock *sk)
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index d012e5e23591..ebe85e59507e 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -1,15 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* XDP user-space ring structure
* Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/slab.h>
@@ -31,8 +22,7 @@ static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
{
- return (sizeof(struct xdp_ring) +
- q->nentries * sizeof(struct xdp_desc));
+ return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
}
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 7aa9a535db0e..cb8e5be35110 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -1,15 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0
- * XDP user-space ring structure
+/* SPDX-License-Identifier: GPL-2.0 */
+/* XDP user-space ring structure
* Copyright(c) 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#ifndef _LINUX_XSK_QUEUE_H
@@ -22,6 +13,23 @@
#define RX_BATCH_SIZE 16
+struct xdp_ring {
+ u32 producer ____cacheline_aligned_in_smp;
+ u32 consumer ____cacheline_aligned_in_smp;
+};
+
+/* Used for the RX and TX queues for packets */
+struct xdp_rxtx_ring {
+ struct xdp_ring ptrs;
+ struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
+};
+
+/* Used for the fill and completion queues for buffers */
+struct xdp_umem_ring {
+ struct xdp_ring ptrs;
+ u32 desc[0] ____cacheline_aligned_in_smp;
+};
+
struct xsk_queue {
struct xdp_umem_props umem_props;
u32 ring_mask;
@@ -232,12 +240,12 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q)
static inline bool xskq_full_desc(struct xsk_queue *q)
{
- return (xskq_nb_avail(q, q->nentries) == q->nentries);
+ return xskq_nb_avail(q, q->nentries) == q->nentries;
}
static inline bool xskq_empty_desc(struct xsk_queue *q)
{
- return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries);
+ return xskq_nb_free(q, q->prod_tail, 1) == q->nentries;
}
void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 62a99ab680e3..1303af10e54d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -51,6 +51,7 @@ hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
+hostprogs-y += task_fd_query
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o xdpsock_user.o
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -160,6 +162,7 @@ always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
+always += task_fd_query_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
HOST_LOADLIBES += $(LIBBPF) -lelf
HOSTLOADLIBES_tracex4 += -lrt
diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
new file mode 100644
index 000000000000..f4b0a9ea674d
--- /dev/null
+++ b/samples/bpf/task_fd_query_kern.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("kprobe/blk_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kretprobe/blk_account_io_completion")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
new file mode 100644
index 000000000000..8381d792f138
--- /dev/null
+++ b/samples/bpf/task_fd_query_user.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define CHECK_PERROR_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("FAIL: %s:\n", __func__); \
+ perror(" "); \
+ return -1; \
+ } \
+})
+
+#define CHECK_AND_RET(condition) ({ \
+ int __ret = !!(condition); \
+ if (__ret) \
+ return -1; \
+})
+
+static __u64 ptr_to_u64(void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ errno = 0;
+ ret = (int)strtol(buf, NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
+static int bpf_get_retprobe_bit(const char *event_type)
+{
+ char buf[256];
+ int fd, ret;
+
+ ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+
+ fd = open(buf, O_RDONLY);
+ CHECK_PERROR_RET(fd < 0);
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
+ CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
+
+ errno = 0;
+ ret = (int)strtol(buf + strlen("config:"), NULL, 10);
+ CHECK_PERROR_RET(errno);
+ return ret;
+}
+
+static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
+ __u32 expected_fd_type)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ char buf[256];
+ int err;
+
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
+ __func__, prog_fd_idx, fn_name);
+ perror(" :");
+ return -1;
+ }
+ if (strcmp(buf, fn_name) != 0 ||
+ fd_type != expected_fd_type ||
+ probe_offset != 0x0 || probe_addr != 0x0) {
+ printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
+ prog_fd_idx);
+ printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
+ " probe_addr: 0x%llx\n",
+ buf, fd_type, probe_offset, probe_addr);
+ return -1;
+ }
+ return 0;
+}
+
+static int test_nondebug_fs_kuprobe_common(const char *event_type,
+ const char *name, __u64 offset, __u64 addr, bool is_return,
+ char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+ __u64 *probe_offset, __u64 *probe_addr)
+{
+ int is_return_bit = bpf_get_retprobe_bit(event_type);
+ int type = bpf_find_probe_type(event_type);
+ struct perf_event_attr attr = {};
+ int fd;
+
+ if (type < 0 || is_return_bit < 0) {
+ printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
+ __func__, type, is_return_bit);
+ return -1;
+ }
+
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ if (is_return)
+ attr.config |= 1 << is_return_bit;
+
+ if (name) {
+ attr.config1 = ptr_to_u64((void *)name);
+ attr.config2 = offset;
+ } else {
+ attr.config1 = 0;
+ attr.config2 = addr;
+ }
+ attr.size = sizeof(attr);
+ attr.type = type;
+
+ fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ CHECK_PERROR_RET(fd < 0);
+
+ CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+ CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+ CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
+ prog_id, fd_type, probe_offset, probe_addr) < 0);
+
+ return 0;
+}
+
+static int test_nondebug_fs_probe(const char *event_type, const char *name,
+ __u64 offset, __u64 addr, bool is_return,
+ __u32 expected_fd_type,
+ __u32 expected_ret_fd_type,
+ char *buf, __u32 buf_len)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 prog_id, fd_type;
+ int err;
+
+ err = test_nondebug_fs_kuprobe_common(event_type, name,
+ offset, addr, is_return,
+ buf, &buf_len, &prog_id,
+ &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, "
+ "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
+ __func__, name ? name : "", offset, addr, is_return);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != expected_ret_fd_type) ||
+ (!is_return && fd_type != expected_fd_type)) {
+ printf("FAIL: %s, incorrect fd_type %u\n",
+ __func__, fd_type);
+ return -1;
+ }
+ if (name) {
+ if (strcmp(name, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
+ __func__, probe_offset);
+ return -1;
+ }
+ } else {
+ if (buf_len != 0) {
+ printf("FAIL: %s, incorrect buf %p\n",
+ __func__, buf);
+ return -1;
+ }
+
+ if (probe_addr != addr) {
+ printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
+ __func__, probe_addr);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
+{
+ const char *event_type = "uprobe";
+ struct perf_event_attr attr = {};
+ char buf[256], event_alias[256];
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err, res, kfd, efd;
+ ssize_t bytes;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
+ event_type);
+ kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ CHECK_PERROR_RET(kfd < 0);
+
+ res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
+
+ res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
+ is_return ? 'r' : 'p', event_type, event_alias,
+ binary_path, offset);
+ CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
+ CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
+
+ close(kfd);
+ kfd = -1;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
+ event_type, event_alias);
+ efd = open(buf, O_RDONLY, 0);
+ CHECK_PERROR_RET(efd < 0);
+
+ bytes = read(efd, buf, sizeof(buf));
+ CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
+ close(efd);
+ buf[bytes] = '\0';
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ CHECK_PERROR_RET(kfd < 0);
+ CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
+ CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
+
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
+ &prog_id, &fd_type, &probe_offset,
+ &probe_addr);
+ if (err < 0) {
+ printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
+ perror(" :");
+ return -1;
+ }
+ if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) ||
+ (!is_return && fd_type != BPF_FD_TYPE_UPROBE)) {
+ printf("FAIL: %s, incorrect fd_type %u\n", __func__,
+ fd_type);
+ return -1;
+ }
+ if (strcmp(binary_path, buf) != 0) {
+ printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
+ return -1;
+ }
+ if (probe_offset != offset) {
+ printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
+ probe_offset);
+ return -1;
+ }
+
+ close(kfd);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {1024*1024, RLIM_INFINITY};
+ extern char __executable_start;
+ char filename[256], buf[256];
+ __u64 uprobe_file_offset;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ /* test two functions in the corresponding *_kern.c file */
+ CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
+ BPF_FD_TYPE_KPROBE));
+ CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+ BPF_FD_TYPE_KRETPROBE));
+
+ /* test nondebug fs kprobe */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#ifdef __x86_64__
+ /* set a kprobe on "bpf_check + 0x5", which is x64 specific */
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
+ false, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+#endif
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
+ true, BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), false,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ NULL, 0));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
+ ksym_get_addr("bpf_check"), true,
+ BPF_FD_TYPE_KPROBE,
+ BPF_FD_TYPE_KRETPROBE,
+ 0, 0));
+
+ /* test nondebug fs uprobe */
+ /* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
+ * and the default linker script, which defines __executable_start as
+ * the start of the .text section. The calculation could be different
+ * on different systems with different compilers. The right way is
+ * to parse the ELF file. We took a shortcut here.
+ */
+ uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, false,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+ CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
+ uprobe_file_offset, 0x0, true,
+ BPF_FD_TYPE_UPROBE,
+ BPF_FD_TYPE_URETPROBE,
+ buf, sizeof(buf)));
+
+ /* test debug fs uprobe */
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ false));
+ CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
+ true));
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 211db8ded0de..ad10fe700d7d 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -125,6 +125,7 @@ struct datarec {
u64 processed;
u64 dropped;
u64 info;
+ u64 err;
};
#define MAX_CPUS 64
@@ -208,3 +209,51 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
return 0;
}
+
+struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct devmap_xmit_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ u32 map_index; // offset:16; size:4; signed:0;
+ int drops; // offset:20; size:4; signed:1;
+ int sent; // offset:24; size:4; signed:1;
+ int from_ifindex; // offset:28; size:4; signed:1;
+ int to_ifindex; // offset:32; size:4; signed:1;
+ int err; // offset:36; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_devmap_xmit")
+int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
+{
+ struct datarec *rec;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->sent;
+ rec->dropped += ctx->drops;
+
+ /* Record bulk events, then userspace can calc average bulk size */
+ rec->info += 1;
+
+ /* Record error cases, where no frame were sent */
+ if (ctx->err)
+ rec->err++;
+
+ /* Catch API error of drv ndo_xdp_xmit sent more than count */
+ if (ctx->drops < 0)
+ rec->err++;
+
+ return 1;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index bf09b5188acd..dd558cbb2309 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -117,6 +117,7 @@ struct datarec {
__u64 processed;
__u64 dropped;
__u64 info;
+ __u64 err;
};
#define MAX_CPUS 64
@@ -141,6 +142,7 @@ struct stats_record {
struct record_u64 xdp_exception[XDP_ACTION_MAX];
struct record xdp_cpumap_kthread;
struct record xdp_cpumap_enqueue[MAX_CPUS];
+ struct record xdp_devmap_xmit;
};
static bool map_collect_record(int fd, __u32 key, struct record *rec)
@@ -151,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_info = 0;
+ __u64 sum_err = 0;
int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
@@ -169,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
sum_dropped += values[i].dropped;
rec->cpu[i].info = values[i].info;
sum_info += values[i].info;
+ rec->cpu[i].err = values[i].err;
+ sum_err += values[i].err;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.info = sum_info;
+ rec->total.err = sum_err;
return true;
}
@@ -273,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period)
return pps;
}
+static double calc_err(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->err - p->err;
+ pps = packets / period;
+ }
+ return pps;
+}
+
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
bool err_only)
@@ -397,7 +415,7 @@ static void stats_print(struct stats_record *stats_rec,
info = calc_info(r, p, t);
if (info > 0)
i_str = "sched";
- if (pps > 0)
+ if (pps > 0 || drop > 0)
printf(fmt1, "cpumap-kthread",
i, pps, drop, info, i_str);
}
@@ -409,6 +427,50 @@ static void stats_print(struct stats_record *stats_rec,
printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
+ /* devmap ndo_xdp_xmit stats */
+ {
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
+ struct record *rec, *prev;
+ double drop, info, err;
+ char *i_str = "";
+ char *err_str = "";
+
+ rec = &stats_rec->xdp_devmap_xmit;
+ prev = &stats_prev->xdp_devmap_xmit;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ err = calc_err(r, p, t);
+ if (info > 0) {
+ i_str = "bulk-average";
+ info = (pps+drop) / info; /* calc avg bulk */
+ }
+ if (err > 0)
+ err_str = "drv-err";
+ if (pps > 0 || drop > 0)
+ printf(fmt1, "devmap-xmit",
+ i, pps, drop, info, i_str, err_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ err = calc_err(&rec->total, &prev->total, t);
+ if (info > 0) {
+ i_str = "bulk-average";
+ info = (pps+drop) / info; /* calc avg bulk */
+ }
+ if (err > 0)
+ err_str = "drv-err";
+ printf(fmt2, "devmap-xmit", "total", pps, drop,
+ info, i_str, err_str);
+ }
+
printf("\n");
}
@@ -437,6 +499,9 @@ static bool stats_collect(struct stats_record *rec)
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
+ fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
+ map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
+
return true;
}
@@ -480,6 +545,7 @@ static struct stats_record *alloc_stats_record(void)
rec_sz = sizeof(struct datarec);
rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
+ rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < MAX_CPUS; i++)
rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
@@ -498,6 +564,7 @@ static void free_stats_record(struct stats_record *r)
free(r->xdp_exception[i].cpu);
free(r->xdp_cpumap_kthread.cpu);
+ free(r->xdp_devmap_xmit.cpu);
for (i = 0; i < MAX_CPUS; i++)
free(r->xdp_cpumap_enqueue[i].cpu);
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 7fe60f6f7d53..e379eac034ac 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1,15 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 - 2018 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
+/* Copyright(c) 2017 - 2018 Intel Corporation. */
#include <assert.h>
#include <errno.h>
@@ -89,7 +79,10 @@ struct xdp_umem_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
- struct xdp_umem_ring *ring;
+ u32 *producer;
+ u32 *consumer;
+ u32 *ring;
+ void *map;
};
struct xdp_umem {
@@ -104,7 +97,10 @@ struct xdp_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
- struct xdp_rxtx_ring *ring;
+ u32 *producer;
+ u32 *consumer;
+ struct xdp_desc *ring;
+ void *map;
};
struct xdpsock {
@@ -165,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
return free_entries;
/* Refresh the local tail pointer */
- q->cached_cons = q->ring->ptrs.consumer;
+ q->cached_cons = *q->consumer;
return q->size - (q->cached_prod - q->cached_cons);
}
@@ -178,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
return free_entries;
/* Refresh the local tail pointer */
- q->cached_cons = q->ring->ptrs.consumer + q->size;
+ q->cached_cons = *q->consumer + q->size;
return q->cached_cons - q->cached_prod;
}
@@ -187,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
- q->cached_prod = q->ring->ptrs.producer;
+ q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@@ -199,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
- q->cached_prod = q->ring->ptrs.producer;
+ q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@@ -218,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
- fq->ring->desc[idx] = d[i].idx;
+ fq->ring[idx] = d[i].idx;
}
u_smp_wmb();
- fq->ring->ptrs.producer = fq->cached_prod;
+ *fq->producer = fq->cached_prod;
return 0;
}
@@ -239,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
- fq->ring->desc[idx] = d[i];
+ fq->ring[idx] = d[i];
}
u_smp_wmb();
- fq->ring->ptrs.producer = fq->cached_prod;
+ *fq->producer = fq->cached_prod;
return 0;
}
@@ -258,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
for (i = 0; i < entries; i++) {
idx = cq->cached_cons++ & cq->mask;
- d[i] = cq->ring->desc[idx];
+ d[i] = cq->ring[idx];
}
if (entries > 0) {
u_smp_wmb();
- cq->ring->ptrs.consumer = cq->cached_cons;
+ *cq->consumer = cq->cached_cons;
}
return entries;
@@ -280,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
const struct xdp_desc *descs,
unsigned int ndescs)
{
- struct xdp_rxtx_ring *r = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@@ -289,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- r->desc[idx].idx = descs[i].idx;
- r->desc[idx].len = descs[i].len;
- r->desc[idx].offset = descs[i].offset;
+ r[idx].idx = descs[i].idx;
+ r[idx].len = descs[i].len;
+ r[idx].offset = descs[i].offset;
}
u_smp_wmb();
- r->ptrs.producer = uq->cached_prod;
+ *uq->producer = uq->cached_prod;
return 0;
}
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
__u32 idx, unsigned int ndescs)
{
- struct xdp_rxtx_ring *q = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@@ -312,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- q->desc[idx].idx = idx + i;
- q->desc[idx].len = sizeof(pkt_data) - 1;
- q->desc[idx].offset = 0;
+ r[idx].idx = idx + i;
+ r[idx].len = sizeof(pkt_data) - 1;
+ r[idx].offset = 0;
}
u_smp_wmb();
- q->ptrs.producer = uq->cached_prod;
+ *uq->producer = uq->cached_prod;
return 0;
}
@@ -327,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq,
struct xdp_desc *descs,
int ndescs)
{
- struct xdp_rxtx_ring *r = uq->ring;
+ struct xdp_desc *r = uq->ring;
unsigned int idx;
int i, entries;
@@ -337,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq,
for (i = 0; i < entries; i++) {
idx = uq->cached_cons++ & uq->mask;
- descs[i] = r->desc[idx];
+ descs[i] = r[idx];
}
if (entries > 0) {
u_smp_wmb();
- r->ptrs.consumer = uq->cached_cons;
+ *uq->consumer = uq->cached_cons;
}
return entries;
@@ -402,8 +398,10 @@ static size_t gen_eth_frame(char *frame)
static struct xdp_umem *xdp_umem_configure(int sfd)
{
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
+ struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xdp_umem *umem;
+ socklen_t optlen;
void *bufs;
umem = calloc(1, sizeof(*umem));
@@ -423,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
sizeof(int)) == 0);
- umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
- FQ_NUM_DESCS * sizeof(u32),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_UMEM_PGOFF_FILL_RING);
- lassert(umem->fq.ring != MAP_FAILED);
+ optlen = sizeof(off);
+ lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
+ &optlen) == 0);
+
+ umem->fq.map = mmap(0, off.fr.desc +
+ FQ_NUM_DESCS * sizeof(u32),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ lassert(umem->fq.map != MAP_FAILED);
umem->fq.mask = FQ_NUM_DESCS - 1;
umem->fq.size = FQ_NUM_DESCS;
+ umem->fq.producer = umem->fq.map + off.fr.producer;
+ umem->fq.consumer = umem->fq.map + off.fr.consumer;
+ umem->fq.ring = umem->fq.map + off.fr.desc;
- umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
+ umem->cq.map = mmap(0, off.cr.desc +
CQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING);
- lassert(umem->cq.ring != MAP_FAILED);
+ lassert(umem->cq.map != MAP_FAILED);
umem->cq.mask = CQ_NUM_DESCS - 1;
umem->cq.size = CQ_NUM_DESCS;
+ umem->cq.producer = umem->cq.map + off.cr.producer;
+ umem->cq.consumer = umem->cq.map + off.cr.consumer;
+ umem->cq.ring = umem->cq.map + off.cr.desc;
umem->frames = (char (*)[FRAME_SIZE])bufs;
umem->fd = sfd;
@@ -459,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
static struct xdpsock *xsk_configure(struct xdp_umem *umem)
{
struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
int sfd, ndescs = NUM_DESCS;
struct xdpsock *xsk;
bool shared = true;
+ socklen_t optlen;
u32 i;
sfd = socket(PF_XDP, SOCK_RAW, 0);
@@ -484,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
&ndescs, sizeof(int)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
&ndescs, sizeof(int)) == 0);
+ optlen = sizeof(off);
+ lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
+ &optlen) == 0);
/* Rx */
- xsk->rx.ring = mmap(NULL,
- sizeof(struct xdp_ring) +
- NUM_DESCS * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_PGOFF_RX_RING);
- lassert(xsk->rx.ring != MAP_FAILED);
+ xsk->rx.map = mmap(NULL,
+ off.rx.desc +
+ NUM_DESCS * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_PGOFF_RX_RING);
+ lassert(xsk->rx.map != MAP_FAILED);
if (!shared) {
for (i = 0; i < NUM_DESCS / 2; i++)
@@ -501,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
}
/* Tx */
- xsk->tx.ring = mmap(NULL,
- sizeof(struct xdp_ring) +
- NUM_DESCS * sizeof(struct xdp_desc),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, sfd,
- XDP_PGOFF_TX_RING);
- lassert(xsk->tx.ring != MAP_FAILED);
+ xsk->tx.map = mmap(NULL,
+ off.tx.desc +
+ NUM_DESCS * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, sfd,
+ XDP_PGOFF_TX_RING);
+ lassert(xsk->tx.map != MAP_FAILED);
xsk->rx.mask = NUM_DESCS - 1;
xsk->rx.size = NUM_DESCS;
+ xsk->rx.producer = xsk->rx.map + off.rx.producer;
+ xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
+ xsk->rx.ring = xsk->rx.map + off.rx.desc;
xsk->tx.mask = NUM_DESCS - 1;
xsk->tx.size = NUM_DESCS;
+ xsk->tx.producer = xsk->tx.map + off.tx.producer;
+ xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
+ xsk->tx.ring = xsk->tx.map + off.tx.desc;
sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = opt_ifindex;
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 8f59897fbda1..5010a4d5bfba 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -95,7 +95,7 @@ class HeaderParser(object):
return capture.group(1)
def parse_desc(self):
- p = re.compile(' \* ?(?:\t| {6,8})Description$')
+ p = re.compile(' \* ?(?:\t| {5,8})Description$')
capture = p.match(self.line)
if not capture:
# Helper can have empty description and we might be parsing another
@@ -109,7 +109,7 @@ class HeaderParser(object):
if self.line == ' *\n':
desc += '\n'
else:
- p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
+ p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
desc += capture.group(1) + '\n'
@@ -118,7 +118,7 @@ class HeaderParser(object):
return desc
def parse_ret(self):
- p = re.compile(' \* ?(?:\t| {6,8})Return$')
+ p = re.compile(' \* ?(?:\t| {5,8})Return$')
capture = p.match(self.line)
if not capture:
# Helper can have empty retval and we might be parsing another
@@ -132,7 +132,7 @@ class HeaderParser(object):
if self.line == ' *\n':
ret += '\n'
else:
- p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
+ p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
ret += capture.group(1) + '\n'
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644
index 000000000000..e3eb0eab7641
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -0,0 +1,81 @@
+================
+bpftool-perf
+================
+-------------------------------------------------------------------------------
+tool for inspection of perf related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **perf** *COMMAND*
+
+ *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+ *COMMANDS* :=
+ { **show** | **list** | **help** }
+
+PERF COMMANDS
+=============
+
+| **bpftool** **perf { show | list }**
+| **bpftool** **perf help**
+
+DESCRIPTION
+===========
+ **bpftool perf { show | list }**
+ List all raw_tracepoint, tracepoint, kprobe attachment in the system.
+
+ Output will start with process id and file descriptor in that process,
+ followed by bpf program id, attachment information, and attachment point.
+ The attachment point for raw_tracepoint/tracepoint is the trace probe name.
+ The attachment point for k[ret]probe is either symbol name and offset,
+ or a kernel virtual address.
+ The attachment point for u[ret]probe is the file name and the file offset.
+
+ **bpftool perf help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool perf**
+
+::
+
+ pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
+ pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
+ pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
+ pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
+
+|
+| **# bpftool -j perf**
+
+::
+
+ [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
+ {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
+ {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
+ {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
+
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 564cb0d9692b..b6f5d560460d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version**
- *OBJECT* := { **map** | **program** | **cgroup** }
+ *OBJECT* := { **map** | **program** | **cgroup** | **perf** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -30,6 +30,8 @@ SYNOPSIS
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
+ *PERF-COMMANDS* := { **show** | **list** | **help** }
+
DESCRIPTION
===========
*bpftool* allows for inspection and simple modification of BPF objects
@@ -56,3 +58,4 @@ OPTIONS
SEE ALSO
========
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+ **bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index b301c9b315f1..7bc198d60de2 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -448,6 +448,15 @@ _bpftool()
;;
esac
;;
+ perf)
+ case $command in
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help \
+ show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ec852d21d44..eea7f14355f3 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | cgroup }\n"
+ " OBJECT := { prog | map | cgroup | perf }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
{ "prog", do_prog },
{ "map", do_map },
{ "cgroup", do_cgroup },
+ { "perf", do_perf },
{ "version", do_version },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6173cd997e7a..63fdb310b9a4 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
int do_map(int argc, char **arg);
int do_event_pipe(int argc, char **argv);
int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 000000000000..ac6b1a12c9b7
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <[email protected]>
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+/* 0: undecided, 1: supported, 2: not supported */
+static int perf_query_supported;
+static bool has_perf_query_support(void)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ char buf[256];
+ int fd;
+
+ if (perf_query_supported)
+ goto out;
+
+ fd = open(bin_name, O_RDONLY);
+ if (fd < 0) {
+ p_err("perf_query_support: %s", strerror(errno));
+ goto out;
+ }
+
+ /* the following query will fail as no bpf attachment,
+ * the expected errno is ENOTSUPP
+ */
+ errno = 0;
+ len = sizeof(buf);
+ bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+
+ if (errno == 524 /* ENOTSUPP */) {
+ perf_query_supported = 1;
+ goto close_fd;
+ }
+
+ perf_query_supported = 2;
+ p_err("perf_query_support: %s", strerror(errno));
+ fprintf(stderr,
+ "HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
+
+close_fd:
+ close(fd);
+out:
+ return perf_query_supported == 1;
+}
+
+static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
+ char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+ jsonw_start_object(json_wtr);
+ jsonw_int_field(json_wtr, "pid", pid);
+ jsonw_int_field(json_wtr, "fd", fd);
+ jsonw_uint_field(json_wtr, "prog_id", prog_id);
+ switch (fd_type) {
+ case BPF_FD_TYPE_RAW_TRACEPOINT:
+ jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
+ jsonw_string_field(json_wtr, "tracepoint", buf);
+ break;
+ case BPF_FD_TYPE_TRACEPOINT:
+ jsonw_string_field(json_wtr, "fd_type", "tracepoint");
+ jsonw_string_field(json_wtr, "tracepoint", buf);
+ break;
+ case BPF_FD_TYPE_KPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "kprobe");
+ if (buf[0] != '\0') {
+ jsonw_string_field(json_wtr, "func", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ } else {
+ jsonw_lluint_field(json_wtr, "addr", probe_addr);
+ }
+ break;
+ case BPF_FD_TYPE_KRETPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "kretprobe");
+ if (buf[0] != '\0') {
+ jsonw_string_field(json_wtr, "func", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ } else {
+ jsonw_lluint_field(json_wtr, "addr", probe_addr);
+ }
+ break;
+ case BPF_FD_TYPE_UPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "uprobe");
+ jsonw_string_field(json_wtr, "filename", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ break;
+ case BPF_FD_TYPE_URETPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "uretprobe");
+ jsonw_string_field(json_wtr, "filename", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ break;
+ }
+ jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
+ char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+ printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
+ switch (fd_type) {
+ case BPF_FD_TYPE_RAW_TRACEPOINT:
+ printf("raw_tracepoint %s\n", buf);
+ break;
+ case BPF_FD_TYPE_TRACEPOINT:
+ printf("tracepoint %s\n", buf);
+ break;
+ case BPF_FD_TYPE_KPROBE:
+ if (buf[0] != '\0')
+ printf("kprobe func %s offset %llu\n", buf,
+ probe_offset);
+ else
+ printf("kprobe addr %llu\n", probe_addr);
+ break;
+ case BPF_FD_TYPE_KRETPROBE:
+ if (buf[0] != '\0')
+ printf("kretprobe func %s offset %llu\n", buf,
+ probe_offset);
+ else
+ printf("kretprobe addr %llu\n", probe_addr);
+ break;
+ case BPF_FD_TYPE_UPROBE:
+ printf("uprobe filename %s offset %llu\n", buf, probe_offset);
+ break;
+ case BPF_FD_TYPE_URETPROBE:
+ printf("uretprobe filename %s offset %llu\n", buf,
+ probe_offset);
+ break;
+ }
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+ int tflag, struct FTW *ftwbuf)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err, pid = 0, fd = 0;
+ const char *pch;
+ char buf[4096];
+
+ /* prefix always /proc */
+ pch = fpath + 5;
+ if (*pch == '\0')
+ return 0;
+
+ /* pid should be all numbers */
+ pch++;
+ while (isdigit(*pch)) {
+ pid = pid * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch == '\0')
+ return 0;
+ if (*pch != '/')
+ return FTW_SKIP_SUBTREE;
+
+ /* check /proc/<pid>/fd directory */
+ pch++;
+ if (strncmp(pch, "fd", 2))
+ return FTW_SKIP_SUBTREE;
+ pch += 2;
+ if (*pch == '\0')
+ return 0;
+ if (*pch != '/')
+ return FTW_SKIP_SUBTREE;
+
+ /* check /proc/<pid>/fd/<fd_num> */
+ pch++;
+ while (isdigit(*pch)) {
+ fd = fd * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch != '\0')
+ return FTW_SKIP_SUBTREE;
+
+ /* query (pid, fd) for potential perf events */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
+ &probe_offset, &probe_addr);
+ if (err < 0)
+ return 0;
+
+ if (json_output)
+ print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
+ probe_addr);
+ else
+ print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
+ probe_addr);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+ int err = 0, nopenfd = 16;
+
+ if (!has_perf_query_support())
+ return -1;
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+ p_err("%s", strerror(errno));
+ err = -1;
+ }
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s %s { show | list | help }\n"
+ "",
+ bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9bdfdf2d3fbe..39b88e760367 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -420,7 +420,11 @@ static int do_show(int argc, char **argv)
static int do_dump(int argc, char **argv)
{
+ unsigned long *func_ksyms = NULL;
struct bpf_prog_info info = {};
+ unsigned int *func_lens = NULL;
+ unsigned int nr_func_ksyms;
+ unsigned int nr_func_lens;
struct dump_data dd = {};
__u32 len = sizeof(info);
unsigned int buf_size;
@@ -496,10 +500,34 @@ static int do_dump(int argc, char **argv)
return -1;
}
+ nr_func_ksyms = info.nr_jited_ksyms;
+ if (nr_func_ksyms) {
+ func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
+ if (!func_ksyms) {
+ p_err("mem alloc failed");
+ close(fd);
+ goto err_free;
+ }
+ }
+
+ nr_func_lens = info.nr_jited_func_lens;
+ if (nr_func_lens) {
+ func_lens = malloc(nr_func_lens * sizeof(__u32));
+ if (!func_lens) {
+ p_err("mem alloc failed");
+ close(fd);
+ goto err_free;
+ }
+ }
+
memset(&info, 0, sizeof(info));
*member_ptr = ptr_to_u64(buf);
*member_len = buf_size;
+ info.jited_ksyms = ptr_to_u64(func_ksyms);
+ info.nr_jited_ksyms = nr_func_ksyms;
+ info.jited_func_lens = ptr_to_u64(func_lens);
+ info.nr_jited_func_lens = nr_func_lens;
err = bpf_obj_get_info_by_fd(fd, &info, &len);
close(fd);
@@ -513,6 +541,16 @@ static int do_dump(int argc, char **argv)
goto err_free;
}
+ if (info.nr_jited_ksyms > nr_func_ksyms) {
+ p_err("too many addresses returned");
+ goto err_free;
+ }
+
+ if (info.nr_jited_func_lens > nr_func_lens) {
+ p_err("too many values returned");
+ goto err_free;
+ }
+
if ((member_len == &info.jited_prog_len &&
info.jited_prog_insns == 0) ||
(member_len == &info.xlated_prog_len &&
@@ -550,7 +588,57 @@ static int do_dump(int argc, char **argv)
goto err_free;
}
- disasm_print_insn(buf, *member_len, opcodes, name);
+ if (info.nr_jited_func_lens && info.jited_func_lens) {
+ struct kernel_sym *sym = NULL;
+ char sym_name[SYM_MAX_NAME];
+ unsigned char *img = buf;
+ __u64 *ksyms = NULL;
+ __u32 *lens;
+ __u32 i;
+
+ if (info.nr_jited_ksyms) {
+ kernel_syms_load(&dd);
+ ksyms = (__u64 *) info.jited_ksyms;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ lens = (__u32 *) info.jited_func_lens;
+ for (i = 0; i < info.nr_jited_func_lens; i++) {
+ if (ksyms) {
+ sym = kernel_syms_search(&dd, ksyms[i]);
+ if (sym)
+ sprintf(sym_name, "%s", sym->name);
+ else
+ sprintf(sym_name, "0x%016llx", ksyms[i]);
+ } else {
+ strcpy(sym_name, "unknown");
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "name");
+ jsonw_string(json_wtr, sym_name);
+ jsonw_name(json_wtr, "insns");
+ } else {
+ printf("%s:\n", sym_name);
+ }
+
+ disasm_print_insn(img, lens[i], opcodes, name);
+ img += lens[i];
+
+ if (json_output)
+ jsonw_end_object(json_wtr);
+ else
+ printf("\n");
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+ } else {
+ disasm_print_insn(buf, *member_len, opcodes, name);
+ }
} else if (visual) {
if (json_output)
jsonw_null(json_wtr);
@@ -558,6 +646,9 @@ static int do_dump(int argc, char **argv)
dump_xlated_cfg(buf, *member_len);
} else {
kernel_syms_load(&dd);
+ dd.nr_jited_ksyms = info.nr_jited_ksyms;
+ dd.jited_ksyms = (__u64 *) info.jited_ksyms;
+
if (json_output)
dump_xlated_json(&dd, buf, *member_len, opcodes);
else
@@ -566,10 +657,14 @@ static int do_dump(int argc, char **argv)
}
free(buf);
+ free(func_ksyms);
+ free(func_lens);
return 0;
err_free:
free(buf);
+ free(func_ksyms);
+ free(func_lens);
return -1;
}
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 7a3173b76c16..b97f1da60dd1 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd)
free(dd->sym_mapping);
}
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
- unsigned long key)
+struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+ unsigned long key)
{
struct kernel_sym sym = {
.address = key,
@@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
unsigned long address,
const struct bpf_insn *insn)
{
- if (sym)
+ if (!dd->nr_jited_ksyms)
+ /* Do not show address for interpreted programs */
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%+d", insn->off);
+ else if (sym)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"%+d#%s", insn->off, sym->name);
else
@@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
unsigned long address = dd->address_call_base + insn->imm;
struct kernel_sym *sym;
+ if (insn->src_reg == BPF_PSEUDO_CALL &&
+ (__u32) insn->imm < dd->nr_jited_ksyms)
+ address = dd->jited_ksyms[insn->imm];
+
sym = kernel_syms_search(dd, address);
if (insn->src_reg == BPF_PSEUDO_CALL)
return print_call_pcrel(dd, sym, address, insn);
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index b34affa7ef2d..33d86e2b369b 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,11 +49,14 @@ struct dump_data {
unsigned long address_call_base;
struct kernel_sym *sym_mapping;
__u32 sym_count;
+ __u64 *jited_ksyms;
+ __u32 nr_jited_ksyms;
char scratch_buff[SYM_MAX_NAME + 8];
};
void kernel_syms_load(struct dump_data *dd);
void kernel_syms_destroy(struct dump_data *dd);
+struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
bool opcodes);
void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d94d333a8225..9b8c6e310e9a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
};
enum bpf_map_type {
@@ -141,6 +142,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
};
enum bpf_attach_type {
@@ -284,8 +286,8 @@ union bpf_attr {
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */
- __u32 btf_key_id; /* BTF type_id of the key */
- __u32 btf_value_id; /* BTF type_id of the value */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -379,6 +381,22 @@ union bpf_attr {
__u32 btf_log_size;
__u32 btf_log_level;
};
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1902,6 +1920,90 @@ union bpf_attr {
* egress otherwise). This is the only flag supported for now.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -1976,7 +2078,11 @@ union bpf_attr {
FN(fib_lookup), \
FN(sock_hash_update), \
FN(msg_redirect_hash), \
- FN(sk_redirect_hash),
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
BPF_HDR_START_NET,
};
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE
+};
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -2176,6 +2288,14 @@ enum sk_action {
struct sk_msg_md {
void *data;
void *data_end;
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
};
#define BPF_TAG_SIZE 8
@@ -2197,6 +2317,10 @@ struct bpf_prog_info {
__u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -2211,8 +2335,8 @@ struct bpf_map_info {
__u64 netns_dev;
__u64 netns_ino;
__u32 btf_id;
- __u32 btf_key_id;
- __u32 btf_value_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index bcb56ee47014..0b5ddbe135a4 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -12,42 +12,29 @@ struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
-
- __u32 parent_label;
- __u32 parent_name;
+ __u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
- __u32 label_off; /* offset of label section */
- __u32 object_off; /* offset of data object section*/
- __u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
/* Max # of type identifier */
-#define BTF_MAX_TYPE 0x7fffffff
+#define BTF_MAX_TYPE 0x0000ffff
/* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET 0x7fffffff
+#define BTF_MAX_NAME_OFFSET 0x0000ffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
-/* The type id is referring to a parent BTF */
-#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
-#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
-
-/* String is in the ELF string section */
-#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
-#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
-
struct btf_type {
__u32 name_off;
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
- * bits 24-28: kind (e.g. int, ptr, array...etc)
- * bits 29-30: unused
- * bits 31: root
+ * bits 24-27: kind (e.g. int, ptr, array...etc)
+ * bits 28-31: unused
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
@@ -62,8 +49,7 @@ struct btf_type {
};
};
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
-#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_KIND_UNKN 0 /* Unknown */
@@ -88,15 +74,14 @@ struct btf_type {
/* BTF_KIND_INT is followed by a u32 and the following
* is the 32 bits arrangement:
*/
-#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
/* Attributes stored in the BTF_INT_ENCODING */
-#define BTF_INT_SIGNED 0x1
-#define BTF_INT_CHAR 0x2
-#define BTF_INT_BOOL 0x4
-#define BTF_INT_VARARGS 0x8
+#define BTF_INT_SIGNED (1 << 0)
+#define BTF_INT_CHAR (1 << 1)
+#define BTF_INT_BOOL (1 << 2)
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
* The exact number of btf_enum is stored in the vlen (of the
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 6a8a00097fd8..9ddc89dae962 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -89,8 +89,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
min(name_len, BPF_OBJ_NAME_LEN - 1));
attr.numa_node = create_attr->numa_node;
attr.btf_fd = create_attr->btf_fd;
- attr.btf_key_id = create_attr->btf_key_id;
- attr.btf_value_id = create_attr->btf_value_id;
+ attr.btf_key_type_id = create_attr->btf_key_type_id;
+ attr.btf_value_type_id = create_attr->btf_value_type_id;
attr.map_ifindex = create_attr->map_ifindex;
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
@@ -643,3 +643,26 @@ retry:
return fd;
}
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr)
+{
+ union bpf_attr attr = {};
+ int err;
+
+ attr.task_fd_query.pid = pid;
+ attr.task_fd_query.fd = fd;
+ attr.task_fd_query.flags = flags;
+ attr.task_fd_query.buf = ptr_to_u64(buf);
+ attr.task_fd_query.buf_len = *buf_len;
+
+ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+ *buf_len = attr.task_fd_query.buf_len;
+ *prog_id = attr.task_fd_query.prog_id;
+ *fd_type = attr.task_fd_query.fd_type;
+ *probe_offset = attr.task_fd_query.probe_offset;
+ *probe_addr = attr.task_fd_query.probe_addr;
+
+ return err;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 15bff7728cf1..0639a30a457d 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -36,8 +36,8 @@ struct bpf_create_map_attr {
__u32 max_entries;
__u32 numa_node;
__u32 btf_fd;
- __u32 btf_key_id;
- __u32 btf_value_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
__u32 map_ifindex;
};
@@ -107,4 +107,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log);
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr);
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2bac710e3194..8c54a4b6f187 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -35,9 +35,8 @@ struct btf {
static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
{
- if (!BTF_STR_TBL_ELF_ID(offset) &&
- BTF_STR_OFFSET(offset) < btf->hdr->str_len)
- return &btf->strings[BTF_STR_OFFSET(offset)];
+ if (offset < btf->hdr->str_len)
+ return &btf->strings[offset];
else
return NULL;
}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index cbdf34a6fb93..d20411ebfa2f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -216,8 +216,8 @@ struct bpf_map {
size_t offset;
int map_ifindex;
struct bpf_map_def def;
- uint32_t btf_key_id;
- uint32_t btf_value_id;
+ uint32_t btf_key_type_id;
+ uint32_t btf_value_type_id;
void *priv;
bpf_map_clear_priv_t clear_priv;
};
@@ -1042,8 +1042,8 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
}
if (def->key_size != key_size) {
- pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n",
- map->name, name, key_size, def->key_size);
+ pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n",
+ map->name, name, (unsigned int)key_size, def->key_size);
return -EINVAL;
}
@@ -1069,13 +1069,13 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
}
if (def->value_size != value_size) {
- pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n",
- map->name, name, value_size, def->value_size);
+ pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n",
+ map->name, name, (unsigned int)value_size, def->value_size);
return -EINVAL;
}
- map->btf_key_id = key_id;
- map->btf_value_id = value_id;
+ map->btf_key_type_id = key_id;
+ map->btf_value_type_id = value_id;
return 0;
}
@@ -1100,24 +1100,24 @@ bpf_object__create_maps(struct bpf_object *obj)
create_attr.value_size = def->value_size;
create_attr.max_entries = def->max_entries;
create_attr.btf_fd = 0;
- create_attr.btf_key_id = 0;
- create_attr.btf_value_id = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
create_attr.btf_fd = btf__fd(obj->btf);
- create_attr.btf_key_id = map->btf_key_id;
- create_attr.btf_value_id = map->btf_value_id;
+ create_attr.btf_key_type_id = map->btf_key_type_id;
+ create_attr.btf_value_type_id = map->btf_value_type_id;
}
*pfd = bpf_create_map_xattr(&create_attr);
- if (*pfd < 0 && create_attr.btf_key_id) {
+ if (*pfd < 0 && create_attr.btf_key_type_id) {
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, strerror(errno), errno);
create_attr.btf_fd = 0;
- create_attr.btf_key_id = 0;
- create_attr.btf_value_id = 0;
- map->btf_key_id = 0;
- map->btf_value_id = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
*pfd = bpf_create_map_xattr(&create_attr);
}
@@ -1456,6 +1456,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_CGROUP_DEVICE:
@@ -2085,14 +2086,14 @@ const char *bpf_map__name(struct bpf_map *map)
return map ? map->name : NULL;
}
-uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
+uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map)
{
- return map ? map->btf_key_id : 0;
+ return map ? map->btf_key_type_id : 0;
}
-uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
+uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map)
{
- return map ? map->btf_value_id : 0;
+ return map ? map->btf_value_type_id : 0;
}
int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index cd3fd8d782c7..09976531aa74 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
int bpf_map__fd(struct bpf_map *map);
const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
const char *bpf_map__name(struct bpf_map *map);
-uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
-uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1eb0fa2aba92..85044448bbc7 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
- test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
+ test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
+ test_lwt_seg6local.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \
test_xdp_meta.sh \
test_offload.py \
test_sock_addr.sh \
- test_tunnel.sh
+ test_tunnel.sh \
+ test_lwt_seg6local.sh
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
@@ -84,7 +86,17 @@ else
CPU ?= generic
endif
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+ $(CLANG_SYS_INCLUDES) \
-Wno-compare-distinct-pointer-types
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 8f143dfb3700..334d3e8c5e89 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -114,6 +114,18 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
int plen, __u32 flags) =
(void *) BPF_FUNC_fib_lookup;
+static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
+ unsigned int len) =
+ (void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
+ void *from, unsigned int len) =
+ (void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
+ unsigned int param_len) =
+ (void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
+ unsigned int len) =
+ (void *) BPF_FUNC_lwt_seg6_adjust_srh;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index c8bceae7ec02..35064df688c1 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -113,22 +113,25 @@ static char btf_log_buf[BTF_LOG_BUF_SIZE];
static struct btf_header hdr_tmpl = {
.magic = BTF_MAGIC,
.version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
};
struct btf_raw_test {
const char *descr;
const char *str_sec;
const char *map_name;
+ const char *err_str;
__u32 raw_types[MAX_NR_RAW_TYPES];
__u32 str_sec_size;
enum bpf_map_type map_type;
__u32 key_size;
__u32 value_size;
- __u32 key_id;
- __u32 value_id;
+ __u32 key_type_id;
+ __u32 value_type_id;
__u32 max_entries;
bool btf_load_err;
bool map_create_err;
+ int hdr_len_delta;
int type_off_delta;
int str_off_delta;
int str_len_delta;
@@ -141,8 +144,8 @@ static struct btf_raw_test raw_tests[] = {
* };
*
* struct A {
- * int m;
- * unsigned long long n;
+ * unsigned long long m;
+ * int n;
* char o;
* [3 bytes hole]
* int p[8];
@@ -163,8 +166,8 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
/* struct A { */ /* [5] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
- BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
- BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
@@ -172,6 +175,7 @@ static struct btf_raw_test raw_tests[] = {
/* } */
/* int[4][8] */
BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
+ /* enum E */ /* [7] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
BTF_ENUM_ENC(NAME_TBD, 0),
BTF_ENUM_ENC(NAME_TBD, 1),
@@ -183,8 +187,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "struct_test1_map",
.key_size = sizeof(int),
.value_size = 180,
- .key_id = 1,
- .value_id = 5,
+ .key_type_id = 1,
+ .value_type_id = 5,
.max_entries = 4,
},
@@ -238,8 +242,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "struct_test2_map",
.key_size = sizeof(int),
.value_size = 68,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
},
@@ -258,7 +262,7 @@ static struct btf_raw_test raw_tests[] = {
/* struct A { */ /* [2] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
- BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
/* } */
BTF_END_RAW,
},
@@ -268,10 +272,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check1_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
},
/* Test member exeeds the size of struct
@@ -301,11 +306,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check2_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
-
+ .err_str = "Member exceeds struct_size",
},
/* Test member exeeds the size of struct
@@ -335,10 +340,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check3_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
},
/* Test member exceeds the size of struct
@@ -376,10 +382,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check4_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
},
/* typedef const void * const_void_ptr;
@@ -411,8 +418,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test1_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 4,
+ .key_type_id = 1,
+ .value_type_id = 4,
.max_entries = 4,
},
@@ -440,10 +447,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test2_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Invalid member",
},
/* typedef const void * const_void_ptr;
@@ -458,9 +466,9 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
/* const void* */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
- /* typedef const void * const_void_ptr */
+ /* typedef const void * const_void_ptr */ /* [4] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
- /* const_void_ptr[4] */ /* [4] */
+ /* const_void_ptr[4] */ /* [5] */
BTF_TYPE_ARRAY_ENC(3, 1, 4),
BTF_END_RAW,
},
@@ -470,8 +478,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test3_map",
.key_size = sizeof(int),
.value_size = sizeof(void *) * 4,
- .key_id = 1,
- .value_id = 4,
+ .key_type_id = 1,
+ .value_type_id = 4,
.max_entries = 4,
},
@@ -493,10 +501,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test4_map",
.key_size = sizeof(int),
.value_size = sizeof(void *) * 4,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Invalid elem",
},
/* Array_A <------------------+
@@ -523,10 +532,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test1_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* typedef is _before_ the BTF type of Array_A and Array_B
@@ -551,7 +561,6 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
/* Array_B */
BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
-
BTF_END_RAW,
},
.str_sec = "\0int_array\0",
@@ -560,10 +569,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test2_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* Array_A <------------------+
@@ -582,7 +592,6 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ARRAY_ENC(3, 1, 8),
/* Array_B */ /* [3] */
BTF_TYPE_ARRAY_ENC(2, 1, 8),
-
BTF_END_RAW,
},
.str_sec = "",
@@ -591,10 +600,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test3_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* typedef is _between_ the BTF type of Array_A and Array_B
@@ -627,10 +637,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test4_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* typedef struct B Struct_B
@@ -668,10 +679,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test5_map",
.key_size = sizeof(int),
.value_size = 8,
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* struct A {
@@ -697,10 +709,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test6_map",
.key_size = sizeof(int),
.value_size = 8,
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
{
@@ -724,10 +737,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test7_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
{
@@ -759,34 +773,73 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test8_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
{
- .descr = "type_off == str_off",
+ .descr = "string section does not end with null",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0int",
+ .str_sec_size = sizeof("\0int") - 1,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty string section",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = 0,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty type section",
+ .raw_types = {
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
.str_sec_size = sizeof("\0int"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"),
+ .err_str = "No type found",
},
{
- .descr = "Unaligned type_off",
+ .descr = "btf_header test. Longer hdr_len",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -798,15 +851,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .type_off_delta = 1,
+ .hdr_len_delta = 4,
+ .err_str = "Unsupported btf_header",
},
{
- .descr = "str_off beyonds btf size",
+ .descr = "btf_header test. Gap between hdr and type",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -818,15 +872,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_off_delta = sizeof("\0int") + 1,
+ .type_off_delta = 4,
+ .err_str = "Unsupported section found",
},
{
- .descr = "str_len beyonds btf size",
+ .descr = "btf_header test. Gap between type and str",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -838,15 +893,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_len_delta = 1,
+ .str_off_delta = 4,
+ .err_str = "Unsupported section found",
},
{
- .descr = "String section does not end with null",
+ .descr = "btf_header test. Overlap between type and str",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -858,15 +914,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_len_delta = -1,
+ .str_off_delta = -4,
+ .err_str = "Section overlap found",
},
{
- .descr = "Empty string section",
+ .descr = "btf_header test. Larger BTF size",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -878,11 +935,288 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = -4,
+ .err_str = "Unsupported section found",
+},
+
+{
+ .descr = "btf_header test. Smaller BTF size",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = 4,
+ .err_str = "Total section length too long",
+},
+
+{
+ .descr = "array test. index_type/elem_type \"int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type/elem_type \"const int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 3, 16),
+ /* CONST type_id=1 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type \"const int:31\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int:31 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+ /* int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(1, 4, 16),
+ /* CONST type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. elem_type \"const int:31\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int:31 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+ /* int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 16),
+ /* CONST type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid array of int",
+},
+
+{
+ .descr = "array test. index_type \"void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 0, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. index_type \"const void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 3, 16),
+ /* CONST type_id=0 (void) */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. elem_type \"const void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 16),
+ /* CONST type_id=0 (void) */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+},
+
+{
+ .descr = "array test. elem_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 3, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_len_delta = 0 - (int)sizeof("\0int"),
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "int test. invalid int_data",
+ .raw_types = {
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
+ 0x10000000,
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid int_data",
+},
+
+{
+ .descr = "invalid BTF_INFO",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info",
},
}; /* struct btf_raw_test raw_tests[] */
@@ -951,6 +1285,7 @@ static void *btf_raw_create(const struct btf_header *hdr,
memcpy(raw_btf + offset, str, str_sec_size);
ret_hdr = (struct btf_header *)raw_btf;
+ ret_hdr->type_len = type_sec_size;
ret_hdr->str_off = type_sec_size;
ret_hdr->str_len = str_sec_size;
@@ -981,6 +1316,7 @@ static int do_test_raw(unsigned int test_num)
hdr = raw_btf;
+ hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
hdr->type_off = (int)hdr->type_off + test->type_off_delta;
hdr->str_off = (int)hdr->str_off + test->str_off_delta;
hdr->str_len = (int)hdr->str_len + test->str_len_delta;
@@ -992,8 +1328,13 @@ static int do_test_raw(unsigned int test_num)
free(raw_btf);
err = ((btf_fd == -1) != test->btf_load_err);
- CHECK(err, "btf_fd:%d test->btf_load_err:%u",
- btf_fd, test->btf_load_err);
+ if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+ btf_fd, test->btf_load_err) ||
+ CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
+ "expected err_str:%s", test->err_str)) {
+ err = -1;
+ goto done;
+ }
if (err || btf_fd == -1)
goto done;
@@ -1004,8 +1345,8 @@ static int do_test_raw(unsigned int test_num)
create_attr.value_size = test->value_size;
create_attr.max_entries = test->max_entries;
create_attr.btf_fd = btf_fd;
- create_attr.btf_key_id = test->key_id;
- create_attr.btf_value_id = test->value_id;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
@@ -1267,8 +1608,8 @@ static int test_btf_id(unsigned int test_num)
create_attr.value_size = sizeof(unsigned int);
create_attr.max_entries = 4;
create_attr.btf_fd = btf_fd[0];
- create_attr.btf_key_id = 1;
- create_attr.btf_value_id = 2;
+ create_attr.btf_key_type_id = 1;
+ create_attr.btf_value_type_id = 2;
map_fd = bpf_create_map_xattr(&create_attr);
if (CHECK(map_fd == -1, "errno:%d", errno)) {
@@ -1279,10 +1620,10 @@ static int test_btf_id(unsigned int test_num)
info_len = sizeof(map_info);
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
if (CHECK(err || map_info.btf_id != info[0].id ||
- map_info.btf_key_id != 1 || map_info.btf_value_id != 2,
- "err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u",
- err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id,
- map_info.btf_value_id)) {
+ map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
+ "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
+ err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
+ map_info.btf_value_type_id)) {
err = -1;
goto done;
}
@@ -1542,10 +1883,10 @@ static int do_test_file(unsigned int test_num)
goto done;
}
- err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
+ err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
!= test->btf_kv_notfound;
- if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u",
- bpf_map__btf_key_id(map), bpf_map__btf_value_id(map),
+ if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
+ bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
test->btf_kv_notfound))
goto done;
@@ -1615,7 +1956,7 @@ static struct btf_raw_test pprint_test = {
/* 28 bits */ /* [7] */
BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
/* uint8_t[8] */ /* [8] */
- BTF_TYPE_ARRAY_ENC(9, 3, 8),
+ BTF_TYPE_ARRAY_ENC(9, 1, 8),
/* typedef unsigned char uint8_t */ /* [9] */
BTF_TYPEDEF_ENC(NAME_TBD, 1),
/* typedef unsigned short uint16_t */ /* [10] */
@@ -1654,8 +1995,8 @@ static struct btf_raw_test pprint_test = {
.map_name = "pprint_test",
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
- .key_id = 3, /* unsigned int */
- .value_id = 16, /* struct pprint_mapv */
+ .key_type_id = 3, /* unsigned int */
+ .value_type_id = 16, /* struct pprint_mapv */
.max_entries = 128 * 1024,
};
@@ -1712,8 +2053,8 @@ static int test_pprint(void)
create_attr.value_size = test->value_size;
create_attr.max_entries = test->max_entries;
create_attr.btf_fd = btf_fd;
- create_attr.btf_key_id = test->key_id;
- create_attr.btf_value_id = test->value_id;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
if (CHECK(map_fd == -1, "errno:%d", errno)) {
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c
new file mode 100644
index 000000000000..0575751bc1bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
@@ -0,0 +1,437 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+ ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+ unsigned int ver:4;
+ unsigned int priority:8;
+ unsigned int flow_label:20;
+ unsigned short payload_len;
+ unsigned char next_header;
+ unsigned char hop_limit;
+ unsigned long long src_hi;
+ unsigned long long src_lo;
+ unsigned long long dst_hi;
+ unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+ unsigned long long hi;
+ unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+ unsigned char nexthdr;
+ unsigned char hdrlen;
+ unsigned char type;
+ unsigned char segments_left;
+ unsigned char first_segment;
+ unsigned char flags;
+ unsigned short tag;
+
+ struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+ unsigned char type;
+ unsigned char len;
+ unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+ void *cursor, *data_end;
+ struct ip6_srh_t *srh;
+ struct ip6_t *ip;
+ uint8_t *ipver;
+
+ data_end = (void *)(long)skb->data_end;
+ cursor = (void *)(long)skb->data;
+ ipver = (uint8_t *)cursor;
+
+ if ((void *)ipver + sizeof(*ipver) > data_end)
+ return NULL;
+
+ if ((*ipver >> 4) != 6)
+ return NULL;
+
+ ip = cursor_advance(cursor, sizeof(*ip));
+ if ((void *)ip + sizeof(*ip) > data_end)
+ return NULL;
+
+ if (ip->next_header != 43)
+ return NULL;
+
+ srh = cursor_advance(cursor, sizeof(*srh));
+ if ((void *)srh + sizeof(*srh) > data_end)
+ return NULL;
+
+ if (srh->type != 4)
+ return NULL;
+
+ return srh;
+}
+
+__attribute__((always_inline))
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+ uint32_t old_pad, uint32_t pad_off)
+{
+ int err;
+
+ if (new_pad != old_pad) {
+ err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+ (int) new_pad - (int) old_pad);
+ if (err)
+ return err;
+ }
+
+ if (new_pad > 0) {
+ char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0};
+ struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+ pad_tlv->type = SR6_TLV_PADDING;
+ pad_tlv->len = new_pad - 2;
+
+ err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+ (void *)pad_tlv_buf, new_pad);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+__attribute__((always_inline))
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t *tlv_off, uint32_t *pad_size,
+ uint32_t *pad_off)
+{
+ uint32_t srh_off, cur_off;
+ int offset_valid = 0;
+ int err;
+
+ srh_off = (char *)srh - (char *)(long)skb->data;
+ // cur_off = end of segments, start of possible TLVs
+ cur_off = srh_off + sizeof(*srh) +
+ sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+ *pad_off = 0;
+
+ // we can only go as far as ~10 TLVs due to the BPF max stack size
+ #pragma clang loop unroll(full)
+ for (int i = 0; i < 10; i++) {
+ struct sr6_tlv_t tlv;
+
+ if (cur_off == *tlv_off)
+ offset_valid = 1;
+
+ if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+ break;
+
+ err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ if (tlv.type == SR6_TLV_PADDING) {
+ *pad_size = tlv.len + sizeof(tlv);
+ *pad_off = cur_off;
+
+ if (*tlv_off == srh_off) {
+ *tlv_off = cur_off;
+ offset_valid = 1;
+ }
+ break;
+
+ } else if (tlv.type == SR6_TLV_HMAC) {
+ break;
+ }
+
+ cur_off += sizeof(tlv) + tlv.len;
+ } // we reached the padding or HMAC TLVs, or the end of the SRH
+
+ if (*pad_off == 0)
+ *pad_off = cur_off;
+
+ if (*tlv_off == -1)
+ *tlv_off = cur_off;
+ else if (!offset_valid)
+ return -EINVAL;
+
+ return 0;
+}
+
+__attribute__((always_inline))
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+ struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ uint32_t partial_srh_len;
+ int err;
+
+ if (tlv_off != -1)
+ tlv_off += srh_off;
+
+ if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+ return -EINVAL;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+ if (err)
+ return err;
+
+ // the following can't be moved inside update_tlv_pad because the
+ // bpf verifier has some issues with it
+ pad_off += sizeof(*itlv) + itlv->len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t tlv_off)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t partial_srh_len;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ struct sr6_tlv_t tlv;
+ int err;
+
+ tlv_off += srh_off;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
+ if (err)
+ return err;
+
+ pad_off -= sizeof(tlv) + tlv.len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
+{
+ int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
+ ((srh->first_segment + 1) << 4);
+ struct sr6_tlv_t tlv;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
+ return 0;
+
+ if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
+ struct ip6_addr_t egr_addr;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
+ return 0;
+
+ // check if egress TLV value is correct
+ if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
+ ntohll(egr_addr.lo) == 0x4)
+ return 1;
+ }
+
+ return 0;
+}
+
+// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
+// fd00::4
+SEC("encap_srh")
+int __encap_srh(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfd00000000000000;
+ struct ip6_addr_t *seg;
+ struct ip6_srh_t *srh;
+ char srh_buf[72]; // room for 4 segments
+ int err;
+
+ srh = (struct ip6_srh_t *)srh_buf;
+ srh->nexthdr = 0;
+ srh->hdrlen = 8;
+ srh->type = 4;
+ srh->segments_left = 3;
+ srh->first_segment = 3;
+ srh->flags = 0;
+ srh->tag = 0;
+
+ seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
+
+ #pragma clang loop unroll(full)
+ for (unsigned long long lo = 0; lo < 4; lo++) {
+ seg->lo = htonll(4 - lo);
+ seg->hi = htonll(hi);
+ seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
+ }
+
+ err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("add_egr_x")
+int __add_egr_x(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfc42000000000000;
+ unsigned long long lo = 0x1;
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint8_t new_flags = SR6_FLAG_ALERT;
+ struct ip6_addr_t addr;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+ err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+ (struct sr6_tlv_t *)&tlv, 20);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ err = bpf_lwt_seg6_store_bytes(skb, offset,
+ (void *)&new_flags, sizeof(new_flags));
+ if (err)
+ return BPF_DROP;
+
+ addr.lo = htonll(lo);
+ addr.hi = htonll(hi);
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+ (void *)&addr, sizeof(addr));
+ if (err)
+ return BPF_DROP;
+ return BPF_REDIRECT;
+}
+
+// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
+// simple End action
+SEC("pop_egr")
+int __pop_egr(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint16_t new_tag = bpf_htons(2442);
+ uint8_t new_flags = 0;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != SR6_FLAG_ALERT)
+ return BPF_DROP;
+
+ if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
+ return BPF_DROP;
+
+ if (!has_egr_tlv(skb, srh))
+ return BPF_DROP;
+
+ err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
+ sizeof(new_flags)))
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
+ sizeof(new_tag)))
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
+// then apply a End.T action to reach the last segment
+SEC("inspect_t")
+int __inspect_t(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ int table = 117;
+ int err;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != 0)
+ return BPF_DROP;
+
+ if (srh->tag != bpf_htons(2442))
+ return BPF_DROP;
+
+ if (srh->hdrlen != 8) // 4 segments
+ return BPF_DROP;
+
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
+ (void *)&table, sizeof(table));
+
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
new file mode 100755
index 000000000000..1c77994b5e71
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+# Connects 6 network namespaces through veths.
+# Each NS may have different IPv6 global scope addresses :
+# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
+# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
+# fc42::1 fd00::4
+#
+# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+# IPv6 header with a Segment Routing Header, with segments :
+# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+#
+# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+# - fd00::2 : remove the TLV, change the flags, add a tag
+# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+#
+# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+# Each End.BPF action will validate the operations applied on the SRH by the
+# previous BPF program in the chain, otherwise the packet is dropped.
+#
+# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+# datagram can be read on NS6 when binding to fb00::6.
+
+TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_lwt_seg6local [PASS]";
+ else
+ echo "selftests: test_lwt_seg6local [FAILED]";
+ fi
+
+ set +e
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+ ip netns del ns3 2> /dev/null
+ ip netns del ns4 2> /dev/null
+ ip netns del ns5 2> /dev/null
+ ip netns del ns6 2> /dev/null
+ rm -f $TMP_FILE
+}
+
+set -e
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+ip netns add ns4
+ip netns add ns5
+ip netns add ns6
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+ip link add veth3 type veth peer name veth4
+ip link add veth5 type veth peer name veth6
+ip link add veth7 type veth peer name veth8
+ip link add veth9 type veth peer name veth10
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+ip link set veth3 netns ns2
+ip link set veth4 netns ns3
+ip link set veth5 netns ns3
+ip link set veth6 netns ns4
+ip link set veth7 netns ns4
+ip link set veth8 netns ns5
+ip link set veth9 netns ns5
+ip link set veth10 netns ns6
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ns2 ip link set dev veth3 up
+ip netns exec ns3 ip link set dev veth4 up
+ip netns exec ns3 ip link set dev veth5 up
+ip netns exec ns4 ip link set dev veth6 up
+ip netns exec ns4 ip link set dev veth7 up
+ip netns exec ns5 ip link set dev veth8 up
+ip netns exec ns5 ip link set dev veth9 up
+ip netns exec ns6 ip link set dev veth10 up
+ip netns exec ns6 ip link set dev lo up
+
+# All link scope addresses and routes required between veths
+ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
+ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ns4 ip -6 addr add fc42::1 dev lo
+ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
+ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
+kill -INT $!
+
+if [[ $(< $TMP_FILE) != "foobar" ]]; then
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 3ecf733330c1..0ef68204c84b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1542,6 +1542,162 @@ close_prog_noerr:
bpf_object__close(obj);
}
+static void test_task_fd_query_rawtp(void)
+{
+ const char *file = "./test_get_stack_rawtp.o";
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ int efd, err, prog_fd;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ /* query (getpid(), efd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_prog;
+
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ strcmp(buf, "sys_enter") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_prog;
+
+ /* test zero len */
+ len = 0;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test empty buffer */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test smaller buffer */
+ len = 3;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter") &&
+ strcmp(buf, "sy") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp_core(const char *probe_name,
+ const char *tp_name)
+{
+ const char *file = "./test_tracepoint.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ struct perf_event_attr attr = {};
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* query (getpid(), pmu_fd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_pmu;
+
+ close(pmu_fd);
+ goto close_prog_noerr;
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp(void)
+{
+ test_task_fd_query_tp_core("sched/sched_switch",
+ "sched_switch");
+ test_task_fd_query_tp_core("syscalls/sys_enter_read",
+ "sys_enter_read");
+}
+
int main(void)
{
jit_enabled = is_jit_enabled();
@@ -1561,6 +1717,8 @@ int main(void)
test_stacktrace_build_id_nmi();
test_stacktrace_map_raw_tp();
test_get_stack_raw_tp();
+ test_task_fd_query_rawtp();
+ test_task_fd_query_tp();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 94498eaf872e..4b4f015be217 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1686,6 +1686,121 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SK_SKB,
},
{
+ "valid access family in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, family)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_ip4 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access local_ip4 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_port in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access local_port in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_ip6 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access local_ip6 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "invalid 64B read of family in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, family)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "invalid read past end of SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_port) + 4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "invalid read offset in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, family) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
"direct packet read for SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 8fb4fe8686e4..3868dcb63420 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -72,6 +72,18 @@ struct ksym *ksym_search(long key)
return &syms[0];
}
+long ksym_get_addr(const char *name)
+{
+ int i;
+
+ for (i = 0; i < sym_cnt; i++) {
+ if (strcmp(syms[i].name, name) == 0)
+ return syms[i].addr;
+ }
+
+ return 0;
+}
+
static int page_size;
static int page_cnt = 8;
static struct perf_event_mmap_page *header;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 36d90e3b1ea9..3b4bcf7f5084 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -11,6 +11,7 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
+long ksym_get_addr(const char *name);
typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 91041c49655b..89ba4cdd4392 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -340,6 +340,31 @@ tunnel_destroy()
ip link del dev $name
}
+vlan_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf=$1; shift
+ local ips=("${@}")
+ local name=$if_name.$vid
+
+ ip link add name $name link $if_name type vlan id $vid
+ if [ "$vrf" != "" ]; then
+ ip link set dev $name master $vrf
+ fi
+ ip link set dev $name up
+ __addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local name=$if_name.$vid
+
+ ip link del dev $name
+}
+
master_name_get()
{
local if_name=$1
@@ -423,26 +448,35 @@ tc_offload_check()
return 0
}
-slow_path_trap_install()
+trap_install()
{
local dev=$1; shift
local direction=$1; shift
- if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
- # For slow-path testing, we need to install a trap to get to
- # slow path the packets that would otherwise be switched in HW.
- tc filter add dev $dev $direction pref 1 \
- flower skip_sw action trap
- fi
+ # For slow-path testing, we need to install a trap to get to
+ # slow path the packets that would otherwise be switched in HW.
+ tc filter add dev $dev $direction pref 1 flower skip_sw action trap
}
-slow_path_trap_uninstall()
+trap_uninstall()
{
local dev=$1; shift
local direction=$1; shift
+ tc filter del dev $dev $direction pref 1 flower skip_sw
+}
+
+slow_path_trap_install()
+{
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ trap_install "$@"
+ fi
+}
+
+slow_path_trap_uninstall()
+{
if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
- tc filter del dev $dev $direction pref 1 flower skip_sw
+ trap_uninstall "$@"
fi
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
index c6786d1b2b96..e6fd7a18c655 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -72,7 +72,6 @@ test_span_gre_mac()
RET=0
mirror_install $swp1 $direction $tundev "matchall $tcflags"
- tc qdisc add dev $h3 clsact
tc filter add dev $h3 ingress pref 77 prot $prot \
flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
action pass
@@ -80,7 +79,6 @@ test_span_gre_mac()
mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
tc filter del dev $h3 ingress pref 77
- tc qdisc del dev $h3 clsact
mirror_uninstall $swp1 $direction
log_test "$direction $what: envelope MAC ($tcflags)"
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 000000000000..3d47afcf7fb9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ vlan_create $swp3 555
+
+ ip link set dev $swp3.555 master br2
+ ip route add 192.0.2.130/32 dev br2
+ ip -6 route add 2001:db8:2::2/128 dev br2
+
+ ip address add dev br2 192.0.2.129/32
+ ip address add dev br2 2001:db8:2::1/128
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h3 555
+ ip link del dev br2
+ vlan_destroy $swp3 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index 50ab3462af0c..aa29d46186a8 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -12,6 +12,8 @@ ALL_TESTS="
test_tun_up
test_egress_up
test_remote_ip
+ test_tun_del
+ test_route_del
"
NUM_NETIFS=6
@@ -71,7 +73,6 @@ test_span_gre_ttl()
RET=0
mirror_install $swp1 ingress $tundev "matchall $tcflags"
- tc qdisc add dev $h3 clsact
tc filter add dev $h3 ingress pref 77 prot $prot \
flower ip_ttl 50 action pass
@@ -82,7 +83,6 @@ test_span_gre_ttl()
ip link set dev $tundev type $type ttl 100
tc filter del dev $h3 ingress pref 77
- tc qdisc del dev $h3 clsact
mirror_uninstall $swp1 ingress
log_test "$what: TTL change ($tcflags)"
@@ -159,6 +159,58 @@ test_span_gre_remote_ip()
log_test "$what: remote address change ($tcflags)"
}
+test_span_gre_tun_del()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local flags=$1; shift
+ local local_ip=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ ip link del dev $tundev
+ fail_test_span_gre_dir $tundev ingress
+
+ tunnel_create $tundev $type $local_ip $remote_ip \
+ ttl 100 tos inherit $flags
+
+ # Recreating the tunnel doesn't reestablish mirroring, so reinstall it
+ # and verify it works for the follow-up tests.
+ mirror_uninstall $swp1 ingress
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel deleted ($tcflags)"
+}
+
+test_span_gre_route_del()
+{
+ local tundev=$1; shift
+ local edev=$1; shift
+ local route=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ ip route del $route dev $edev
+ fail_test_span_gre_dir $tundev ingress
+
+ ip route add $route dev $edev
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: underlay route removal ($tcflags)"
+}
+
test_ttl()
{
test_span_gre_ttl gt4 gretap ip "mirror to gretap"
@@ -183,6 +235,20 @@ test_remote_ip()
test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
}
+test_tun_del()
+{
+ test_span_gre_tun_del gt4 gretap "" \
+ 192.0.2.129 192.0.2.130 "mirror to gretap"
+ test_span_gre_tun_del gt6 ip6gretap allow-localremote \
+ 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_route_del()
+{
+ test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
+ test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
+}
+
test_all()
{
slow_path_trap_install $swp1 ingress
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
index 2e54407d8954..12914f40612d 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -67,6 +67,11 @@ test_span_gre_dir_acl()
test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
}
+fail_test_span_gre_dir_acl()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
full_test_span_gre_dir_acl()
{
local tundev=$1; shift
@@ -83,6 +88,9 @@ full_test_span_gre_dir_acl()
"$forward_type" "$backward_type"
mirror_uninstall $swp1 $direction
+ # Test lack of mirroring after ACL mirror is uninstalled.
+ fail_test_span_gre_dir_acl "$tundev" "$direction"
+
log_test "$direction $what ($tcflags)"
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index 207ffd167dba..92ef6dde98bd 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -1,53 +1,53 @@
# SPDX-License-Identifier: GPL-2.0
-do_test_span_gre_dir_ips()
+source mirror_lib.sh
+
+quick_test_span_gre_dir_ips()
{
- local expect=$1; shift
local tundev=$1; shift
- local direction=$1; shift
- local ip1=$1; shift
- local ip2=$1; shift
- icmp_capture_install h3-$tundev
- mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 $expect
- mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 $expect
- icmp_capture_uninstall h3-$tundev
+ do_test_span_dir_ips 10 h3-$tundev "$@"
}
-quick_test_span_gre_dir_ips()
+fail_test_span_gre_dir_ips()
{
- do_test_span_gre_dir_ips 10 "$@"
+ local tundev=$1; shift
+
+ do_test_span_dir_ips 0 h3-$tundev "$@"
}
-fail_test_span_gre_dir_ips()
+test_span_gre_dir_ips()
{
- do_test_span_gre_dir_ips 0 "$@"
+ local tundev=$1; shift
+
+ test_span_dir_ips h3-$tundev "$@"
}
-test_span_gre_dir_ips()
+full_test_span_gre_dir_ips()
{
local tundev=$1; shift
local direction=$1; shift
local forward_type=$1; shift
local backward_type=$1; shift
+ local what=$1; shift
local ip1=$1; shift
local ip2=$1; shift
- quick_test_span_gre_dir_ips "$tundev" "$direction" "$ip1" "$ip2"
+ RET=0
- icmp_capture_install h3-$tundev "type $forward_type"
- mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 10
- icmp_capture_uninstall h3-$tundev
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+ mirror_uninstall $swp1 $direction
- icmp_capture_install h3-$tundev "type $backward_type"
- mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 10
- icmp_capture_uninstall h3-$tundev
+ log_test "$direction $what ($tcflags)"
}
-full_test_span_gre_dir_ips()
+full_test_span_gre_dir_vlan_ips()
{
local tundev=$1; shift
local direction=$1; shift
+ local vlan_match=$1; shift
local forward_type=$1; shift
local backward_type=$1; shift
local what=$1; shift
@@ -57,8 +57,16 @@ full_test_span_gre_dir_ips()
RET=0
mirror_install $swp1 $direction $tundev "matchall $tcflags"
- test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \
- "$backward_type" "$ip1" "$ip2"
+
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+
+ tc filter add dev $h3 ingress pref 77 prot 802.1q \
+ flower $vlan_match ip_proto 0x2f \
+ action pass
+ mirror_test v$h1 $ip1 $ip2 $h3 77 10
+ tc filter del dev $h3 ingress pref 77
+
mirror_uninstall $swp1 $direction
log_test "$direction $what ($tcflags)"
@@ -83,3 +91,8 @@ full_test_span_gre_dir()
{
full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
}
+
+full_test_span_gre_dir_vlan()
+{
+ full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
index b3ceda2b4197..253419564708 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -33,29 +33,11 @@
# | |
# +-------------------------------------------------------------------------+
-mirror_gre_topo_h1_create()
-{
- simple_if_init $h1 192.0.2.1/28
-}
-
-mirror_gre_topo_h1_destroy()
-{
- simple_if_fini $h1 192.0.2.1/28
-}
-
-mirror_gre_topo_h2_create()
-{
- simple_if_init $h2 192.0.2.2/28
-}
-
-mirror_gre_topo_h2_destroy()
-{
- simple_if_fini $h2 192.0.2.2/28
-}
+source mirror_topo_lib.sh
mirror_gre_topo_h3_create()
{
- simple_if_init $h3
+ mirror_topo_h3_create
tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
ip link set h3-gt4 vrf v$h3
@@ -71,49 +53,32 @@ mirror_gre_topo_h3_destroy()
tunnel_destroy h3-gt6
tunnel_destroy h3-gt4
- simple_if_fini $h3
+ mirror_topo_h3_destroy
}
mirror_gre_topo_switch_create()
{
- ip link set dev $swp3 up
-
- ip link add name br1 type bridge vlan_filtering 1
- ip link set dev br1 up
-
- ip link set dev $swp1 master br1
- ip link set dev $swp1 up
-
- ip link set dev $swp2 master br1
- ip link set dev $swp2 up
+ mirror_topo_switch_create
tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
ttl 100 tos inherit
tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
ttl 100 tos inherit allow-localremote
-
- tc qdisc add dev $swp1 clsact
}
mirror_gre_topo_switch_destroy()
{
- tc qdisc del dev $swp1 clsact
-
tunnel_destroy gt6
tunnel_destroy gt4
- ip link set dev $swp1 down
- ip link set dev $swp2 down
- ip link del dev br1
-
- ip link set dev $swp3 down
+ mirror_topo_switch_destroy
}
mirror_gre_topo_create()
{
- mirror_gre_topo_h1_create
- mirror_gre_topo_h2_create
+ mirror_topo_h1_create
+ mirror_topo_h2_create
mirror_gre_topo_h3_create
mirror_gre_topo_switch_create
@@ -124,6 +89,6 @@ mirror_gre_topo_destroy()
mirror_gre_topo_switch_destroy
mirror_gre_topo_h3_destroy
- mirror_gre_topo_h2_destroy
- mirror_gre_topo_h1_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 000000000000..88cecdb9a861
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+ test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name $swp3.555 link $swp3 type vlan id 555
+ ip address add dev $swp3.555 192.0.2.129/32
+ ip address add dev $swp3.555 2001:db8:2::1/128
+ ip link set dev $swp3.555 up
+
+ ip route add 192.0.2.130/32 dev $swp3.555
+ ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+ ip link add name $h3.555 link $h3 type vlan id 555
+ ip link set dev $h3.555 master v$h3
+ ip address add dev $h3.555 192.0.2.130/28
+ ip address add dev $h3.555 2001:db8:2::2/64
+ ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link del dev $h3.555
+ ip link del dev $swp3.555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 000000000000..01ec28ac2e4a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_forbidden
+ test_ip6gretap_forbidden
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+ bridge vlan add dev br1 vid 555 self
+ ip route rep 192.0.2.130/32 dev br1.555
+ ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+ ip link set dev $swp3 master br1
+ bridge vlan add dev $swp3 vid 555
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp3 nomaster
+ vlan_destroy $h3 555
+ vlan_destroy br1 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ # Run the pass-test first, to prime neighbor table.
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ # Now forbid the VLAN at the bridge and see it fail.
+ bridge vlan del dev br1 vid 555 self
+ sleep 1
+
+ fail_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ bridge vlan add dev br1 vid 555 self
+ sleep 1
+
+ log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden()
+{
+ test_span_gre_forbidden gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden()
+{
+ test_span_gre_forbidden gt4 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index e5028a5725e3..04cbc38e71a2 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -38,3 +38,57 @@ mirror_test()
((expect <= delta && delta <= expect + 2))
check_err $? "Expected to capture $expect packets, got $delta."
}
+
+do_test_span_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ icmp_capture_install $dev
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+ do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+ do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+ icmp_capture_install $dev "type $forward_type"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 10
+ icmp_capture_uninstall $dev
+
+ icmp_capture_install $dev "type $backward_type"
+ mirror_test v$h2 $ip2 $ip1 $dev 100 10
+ icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+ fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+ test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 000000000000..04979e5962e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 |
+# +-----|-------------------------------------------------------------------+
+# |
+# +-----|-------------------------------------------------------------------+
+# | H3 + $h3 |
+# | |
+# +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+ simple_if_init $h3
+ tc qdisc add dev $h3 clsact
+}
+
+mirror_topo_h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+ simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+ ip link set dev $swp3 up
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+
+ ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+ mirror_topo_h1_create
+ mirror_topo_h2_create
+ mirror_topo_h3_create
+
+ mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+ mirror_topo_switch_destroy
+
+ mirror_topo_h3_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 000000000000..1e10520dccf4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+ test_vlan
+ test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_topo_create
+
+ vlan_create $swp3 555
+
+ vlan_create $h3 555 v$h3
+ matchall_sink_create $h3.555
+
+ vlan_create $h1 111 v$h1 192.0.2.17/28
+ bridge vlan add dev $swp1 vid 111
+
+ vlan_create $h2 111 v$h2 192.0.2.18/28
+ bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h2 111
+ vlan_destroy $h1 111
+ vlan_destroy $h3 555
+ vlan_destroy $swp3 555
+
+ mirror_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+ test_vlan_dir ingress 8 0
+ test_vlan_dir egress 0 8
+}
+
+vlan_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local dev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$dev" ingress \
+ proto 802.1q pref $pref \
+ flower $filter \
+ action pass
+}
+
+vlan_capture_install()
+{
+ vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+ vlan_capture_add_del del 100 "$@"
+}
+
+do_test_span_vlan_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local vid=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ vlan_capture_install $dev "vlan_id $vid"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ vlan_capture_uninstall $dev
+}
+
+test_tagged_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+ 192.0.2.17 192.0.2.18
+ do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \
+ 192.0.2.17 192.0.2.18
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+ test_tagged_vlan_dir ingress 8 0
+ test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+ trap_install $h3 ingress
+
+ tests_run
+
+ trap_install $h3 ingress
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS