aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/kapi.rst9
-rw-r--r--Makefile4
-rw-r--r--arch/s390/net/bpf_jit_comp.c61
-rw-r--r--arch/x86/include/asm/nospec-branch.h16
-rw-r--r--arch/x86/net/bpf_jit_comp.c265
-rw-r--r--drivers/net/can/flexcan.c533
-rw-r--r--drivers/net/dsa/b53/b53_common.c19
-rw-r--r--drivers/net/dsa/b53/b53_priv.h1
-rw-r--r--drivers/net/dsa/bcm_sf2.c13
-rw-r--r--drivers/net/dsa/ocelot/felix.c5
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.c6
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c89
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c10
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h26
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c6
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c3
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_if.c1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/Makefile3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.c11
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c8
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c12
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h103
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h83
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c632
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c5
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c30
-rw-r--r--drivers/net/ethernet/mscc/ocelot_net.c22
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c18
-rw-r--r--drivers/net/ethernet/realtek/8139cp.c4
-rw-r--r--drivers/net/mdio/mdio-ipq4019.c109
-rw-r--r--drivers/net/pcs/pcs-lynx.c6
-rw-r--r--drivers/net/phy/phy-core.c32
-rw-r--r--drivers/net/phy/phy.c69
-rw-r--r--drivers/of/of_mdio.c38
-rw-r--r--drivers/s390/net/qeth_core.h22
-rw-r--r--drivers/s390/net/qeth_core_main.c71
-rw-r--r--drivers/s390/net/qeth_core_sys.c65
-rw-r--r--drivers/s390/net/qeth_l2.h7
-rw-r--r--drivers/s390/net/qeth_l2_main.c412
-rw-r--r--drivers/s390/net/qeth_l3.h4
-rw-r--r--drivers/s390/net/qeth_l3_main.c88
-rw-r--r--drivers/s390/net/qeth_l3_sys.c64
-rw-r--r--include/linux/bpf.h30
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/btf_ids.h8
-rw-r--r--include/linux/filter.h4
-rw-r--r--include/linux/if_bridge.h8
-rw-r--r--include/linux/mdio.h3
-rw-r--r--include/linux/of_mdio.h6
-rw-r--r--include/linux/phy.h426
-rw-r--r--include/net/inet_connection_sock.h3
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/soc/mscc/ocelot.h4
-rw-r--r--include/uapi/linux/bpf.h98
-rw-r--r--include/uapi/linux/if_bridge.h17
-rw-r--r--kernel/bpf/arraymap.c55
-rw-r--r--kernel/bpf/bpf_inode_storage.c8
-rw-r--r--kernel/bpf/bpf_local_storage.c2
-rw-r--r--kernel/bpf/btf.c15
-rw-r--r--kernel/bpf/core.c18
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/bpf/syscall.c87
-rw-r--r--kernel/bpf/task_iter.c15
-rw-r--r--kernel/bpf/verifier.c540
-rw-r--r--kernel/trace/bpf_trace.c23
-rw-r--r--net/batman-adv/multicast.c14
-rw-r--r--net/bridge/br_forward.c17
-rw-r--r--net/bridge/br_mdb.c371
-rw-r--r--net/bridge/br_multicast.c678
-rw-r--r--net/bridge/br_private.h49
-rw-r--r--net/core/bpf_sk_storage.c40
-rw-r--r--net/core/dev.c15
-rw-r--r--net/core/filter.c49
-rw-r--r--net/core/sock_map.c284
-rw-r--r--net/dsa/dsa_priv.h66
-rw-r--r--net/dsa/tag_brcm.c15
-rw-r--r--net/dsa/tag_ocelot.c6
-rw-r--r--net/ipv4/bpf_tcp_ca.c19
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_cong.c27
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/xdp/xsk.c7
-rw-r--r--net/xdp/xsk.h1
-rw-r--r--net/xdp/xsk_buff_pool.c6
-rw-r--r--net/xdp/xsk_diag.c14
-rw-r--r--net/xdp/xskmap.c5
-rw-r--r--samples/bpf/.gitignore1
-rw-r--r--samples/bpf/sockex3_user.c6
-rw-r--r--samples/bpf/spintest_user.c6
-rw-r--r--samples/bpf/test_map_in_map_kern.c7
-rw-r--r--samples/bpf/tracex5_user.c6
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c2
-rw-r--r--samples/bpf/xdpsock_user.c28
-rwxr-xr-xscripts/link-vmlinux.sh6
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile15
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst37
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst33
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst33
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst37
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst27
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst36
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst46
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst35
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst34
-rw-r--r--tools/bpf/bpftool/Documentation/common_options.rst22
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool22
-rw-r--r--tools/bpf/bpftool/json_writer.c6
-rw-r--r--tools/bpf/bpftool/json_writer.h3
-rw-r--r--tools/bpf/bpftool/main.c33
-rw-r--r--tools/bpf/bpftool/map.c149
-rw-r--r--tools/bpf/bpftool/prog.c203
-rw-r--r--tools/bpf/resolve_btfids/Makefile2
-rw-r--r--tools/include/linux/btf_ids.h8
-rw-r--r--tools/include/uapi/linux/bpf.h98
-rw-r--r--tools/lib/bpf/bpf.c16
-rw-r--r--tools/lib/bpf/bpf.h8
-rw-r--r--tools/lib/bpf/btf.h18
-rw-r--r--tools/lib/bpf/libbpf.c1356
-rw-r--r--tools/lib/bpf/libbpf.h5
-rw-r--r--tools/lib/bpf/libbpf.map2
-rw-r--r--tools/lib/bpf/libbpf_common.h2
-rw-r--r--tools/lib/bpf/xsk.c1
-rw-r--r--tools/perf/util/bpf-loader.c12
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile10
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c (renamed from tools/testing/selftests/bpf/test_btf.c)410
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/metadata.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c332
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c49
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h9
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c10
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c10
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_unused.c15
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_used.c15
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h11
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf_subprogs.c5
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h30
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_subprogs.c10
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c38
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c41
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c61
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c105
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c103
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c36
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh21
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh82
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c32
200 files changed, 7503 insertions, 3323 deletions
diff --git a/Documentation/networking/kapi.rst b/Documentation/networking/kapi.rst
index f03ae64be8bc..d198fa5eaacd 100644
--- a/Documentation/networking/kapi.rst
+++ b/Documentation/networking/kapi.rst
@@ -134,6 +134,15 @@ PHY Support
.. kernel-doc:: drivers/net/phy/phy.c
:internal:
+.. kernel-doc:: drivers/net/phy/phy-core.c
+ :export:
+
+.. kernel-doc:: drivers/net/phy/phy-c45.c
+ :export:
+
+.. kernel-doc:: include/linux/phy.h
+ :internal:
+
.. kernel-doc:: drivers/net/phy/phy_device.c
:export:
diff --git a/Makefile b/Makefile
index 2b66d3398878..d387c5a239b7 100644
--- a/Makefile
+++ b/Makefile
@@ -1080,13 +1080,15 @@ ifdef CONFIG_STACK_VALIDATION
endif
endif
+ifdef CONFIG_BPF
ifdef CONFIG_DEBUG_INFO_BTF
ifeq ($(has_libelf),1)
resolve_btfids_target := tools/bpf/resolve_btfids FORCE
else
ERROR_RESOLVE_BTFIDS := 1
endif
-endif
+endif # CONFIG_DEBUG_INFO_BTF
+endif # CONFIG_BPF
PHONY += prepare0
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index be4b8532dd3c..0a4182792876 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -50,7 +50,6 @@ struct bpf_jit {
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
- int labels[1]; /* Labels for local jumps */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
@@ -229,18 +228,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
REG_SET_SEEN(b3); \
})
-#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
+#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- int rel = (jit->labels[label] - jit->prg) >> 1; \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
(op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
-#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
+#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- int rel = (jit->labels[label] - jit->prg) >> 1; \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
(rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
@@ -1282,7 +1281,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9040000, BPF_REG_0, REG_2);
break;
}
- case BPF_JMP | BPF_TAIL_CALL:
+ case BPF_JMP | BPF_TAIL_CALL: {
+ int patch_1_clrj, patch_2_clij, patch_3_brc;
+
/*
* Implicit input:
* B1: pointer to ctx
@@ -1300,16 +1301,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
offsetof(struct bpf_array, map.max_entries));
/* if ((u32)%b3 >= (u32)%w1) goto out; */
- if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
- /* clrj %b3,%w1,0xa,label0 */
- EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
- REG_W1, 0, 0xa);
- } else {
- /* clr %b3,%w1 */
- EMIT2(0x1500, BPF_REG_3, REG_W1);
- /* brcl 0xa,label0 */
- EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]);
- }
+ /* clrj %b3,%w1,0xa,out */
+ patch_1_clrj = jit->prg;
+ EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
+ jit->prg);
/*
* if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1324,16 +1319,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
- MAX_TAIL_CALL_CNT, 0, 0x2);
- } else {
- /* clfi %w1,MAX_TAIL_CALL_CNT */
- EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT);
- /* brcl 0x2,label0 */
- EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]);
- }
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,out */
+ patch_2_clij = jit->prg;
+ EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT,
+ 2, jit->prg);
/*
* prog = array->ptrs[index];
@@ -1348,13 +1337,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* ltg %r1,prog(%b2,%r1) */
EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
REG_1, offsetof(struct bpf_array, ptrs));
- if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
- /* brc 0x8,label0 */
- EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]);
- } else {
- /* brcl 0x8,label0 */
- EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]);
- }
+ /* brc 0x8,out */
+ patch_3_brc = jit->prg;
+ EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
/*
* Restore registers before calling function
@@ -1371,8 +1356,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
/* out: */
- jit->labels[0] = jit->prg;
+ if (jit->prg_buf) {
+ *(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
+ (jit->prg - patch_1_clrj) >> 1;
+ *(u16 *)(jit->prg_buf + patch_2_clij + 2) =
+ (jit->prg - patch_2_clij) >> 1;
+ *(u16 *)(jit->prg_buf + patch_3_brc + 2) =
+ (jit->prg - patch_3_brc) >> 1;
+ }
break;
+ }
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
if (last)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e7752b4038ff..e491c3d9f227 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -314,19 +314,19 @@ static inline void mds_idle_clear_cpu_buffers(void)
* lfence
* jmp spec_trap
* do_rop:
- * mov %rax,(%rsp) for x86_64
+ * mov %rcx,(%rsp) for x86_64
* mov %edx,(%esp) for x86_32
* retq
*
* Without retpolines configured:
*
- * jmp *%rax for x86_64
+ * jmp *%rcx for x86_64
* jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
-# define RETPOLINE_RAX_BPF_JIT() \
+# define RETPOLINE_RCX_BPF_JIT_SIZE 17
+# define RETPOLINE_RCX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
@@ -334,7 +334,7 @@ do { \
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
- EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
+ EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
EMIT1(0xC3); /* retq */ \
} while (0)
# else /* !CONFIG_X86_64 */
@@ -352,9 +352,9 @@ do { \
# endif
#else /* !CONFIG_RETPOLINE */
# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
-# define RETPOLINE_RAX_BPF_JIT() \
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
+# define RETPOLINE_RCX_BPF_JIT_SIZE 2
+# define RETPOLINE_RCX_BPF_JIT() \
+ EMIT2(0xFF, 0xE1); /* jmp *%rcx */
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7d9ea7b41c71..26f43279b78b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -221,14 +221,48 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
+/* Number of bytes that will be skipped on tailcall */
+#define X86_TAIL_CALL_OFFSET 11
-#define PROLOGUE_SIZE 25
+static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (callee_regs_used[0])
+ EMIT1(0x53); /* push rbx */
+ if (callee_regs_used[1])
+ EMIT2(0x41, 0x55); /* push r13 */
+ if (callee_regs_used[2])
+ EMIT2(0x41, 0x56); /* push r14 */
+ if (callee_regs_used[3])
+ EMIT2(0x41, 0x57); /* push r15 */
+ *pprog = prog;
+}
+
+static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (callee_regs_used[3])
+ EMIT2(0x41, 0x5F); /* pop r15 */
+ if (callee_regs_used[2])
+ EMIT2(0x41, 0x5E); /* pop r14 */
+ if (callee_regs_used[1])
+ EMIT2(0x41, 0x5D); /* pop r13 */
+ if (callee_regs_used[0])
+ EMIT1(0x5B); /* pop rbx */
+ *pprog = prog;
+}
/*
- * Emit x86-64 prologue code for BPF program and check its size.
- * bpf_tail_call helper will skip it while jumping into another program
+ * Emit x86-64 prologue code for BPF program.
+ * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
+ * while jumping to another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
+ bool tail_call_reachable, bool is_subprog)
{
u8 *prog = *pprog;
int cnt = X86_PATCH_SIZE;
@@ -238,19 +272,18 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*/
memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
prog += cnt;
+ if (!ebpf_from_cbpf) {
+ if (tail_call_reachable && !is_subprog)
+ EMIT2(0x31, 0xC0); /* xor eax, eax */
+ else
+ EMIT2(0x66, 0x90); /* nop2 */
+ }
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
- EMIT1(0x53); /* push rbx */
- EMIT2(0x41, 0x55); /* push r13 */
- EMIT2(0x41, 0x56); /* push r14 */
- EMIT2(0x41, 0x57); /* push r15 */
- if (!ebpf_from_cbpf) {
- /* zero init tail_call_cnt */
- EMIT2(0x6a, 0x00);
- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
- }
+ if (tail_call_reachable)
+ EMIT1(0x50); /* push rax */
*pprog = prog;
}
@@ -314,13 +347,14 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
mutex_lock(&text_mutex);
if (memcmp(ip, old_insn, X86_PATCH_SIZE))
goto out;
+ ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
if (text_live)
text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
else
memcpy(ip, new_insn, X86_PATCH_SIZE);
+ ret = 0;
}
- ret = 0;
out:
mutex_unlock(&text_mutex);
return ret;
@@ -337,6 +371,22 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}
+static int get_pop_bytes(bool *callee_regs_used)
+{
+ int bytes = 0;
+
+ if (callee_regs_used[3])
+ bytes += 2;
+ if (callee_regs_used[2])
+ bytes += 2;
+ if (callee_regs_used[1])
+ bytes += 2;
+ if (callee_regs_used[0])
+ bytes += 1;
+
+ return bytes;
+}
+
/*
* Generate the following code:
*
@@ -351,12 +401,26 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call_indirect(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+ u32 stack_depth)
{
+ int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
- int label1, label2, label3;
+ int pop_bytes = 0;
+ int off1 = 49;
+ int off2 = 38;
+ int off3 = 16;
int cnt = 0;
+ /* count the additional bytes used for popping callee regs from stack
+ * that need to be taken into account for each of the offsets that
+ * are used for bailing out of the tail call
+ */
+ pop_bytes = get_pop_bytes(callee_regs_used);
+ off1 += pop_bytes;
+ off2 += pop_bytes;
+ off3 += pop_bytes;
+
/*
* rdi - pointer to ctx
* rsi - pointer to bpf_array
@@ -370,72 +434,106 @@ static void emit_bpf_tail_call_indirect(u8 **pprog)
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
+#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
- label1 = cnt;
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
+#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
- label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
/* prog = array->ptrs[index]; */
- EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
+ EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
/*
* if (prog == NULL)
* goto out;
*/
- EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
-#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
+ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
+#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
- label3 = cnt;
- /* goto *(prog->bpf_func + prologue_size); */
- EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */
- offsetof(struct bpf_prog, bpf_func));
- EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
+ *pprog = prog;
+ pop_callee_regs(pprog, callee_regs_used);
+ prog = *pprog;
+
+ EMIT1(0x58); /* pop rax */
+ EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
+ round_up(stack_depth, 8));
+ /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
+ EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */
+ offsetof(struct bpf_prog, bpf_func));
+ EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */
+ X86_TAIL_CALL_OFFSET);
/*
- * Wow we're ready to jump into next BPF program
+ * Now we're ready to jump into next BPF program
* rdi == ctx (1st arg)
- * rax == prog->bpf_func + prologue_size
+ * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- RETPOLINE_RAX_BPF_JIT();
+ RETPOLINE_RCX_BPF_JIT();
/* out: */
- BUILD_BUG_ON(cnt - label1 != OFFSET1);
- BUILD_BUG_ON(cnt - label2 != OFFSET2);
- BUILD_BUG_ON(cnt - label3 != OFFSET3);
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
- u8 **pprog, int addr, u8 *image)
+ u8 **pprog, int addr, u8 *image,
+ bool *callee_regs_used, u32 stack_depth)
{
+ int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
+ int pop_bytes = 0;
+ int off1 = 27;
+ int poke_off;
int cnt = 0;
+ /* count the additional bytes used for popping callee regs to stack
+ * that need to be taken into account for jump offset that is used for
+ * bailing out from of the tail call when limit is reached
+ */
+ pop_bytes = get_pop_bytes(callee_regs_used);
+ off1 += pop_bytes;
+
+ /*
+ * total bytes for:
+ * - nop5/ jmpq $off
+ * - pop callee regs
+ * - sub rsp, $val
+ * - pop rax
+ */
+ poke_off = X86_PATCH_SIZE + pop_bytes + 7 + 1;
+
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
- EMIT2(X86_JA, 14); /* ja out */
+ EMIT2(X86_JA, off1); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
+
+ poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
+ poke->adj_off = X86_TAIL_CALL_OFFSET;
+ poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
+ poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
- poke->ip = image + (addr - X86_PATCH_SIZE);
- poke->adj_off = PROLOGUE_SIZE;
+ emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
+ poke->tailcall_bypass);
+
+ *pprog = prog;
+ pop_callee_regs(pprog, callee_regs_used);
+ prog = *pprog;
+ EMIT1(0x58); /* pop rax */
+ EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
@@ -453,7 +551,7 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
for (i = 0; i < prog->aux->size_poke_tab; i++) {
poke = &prog->aux->poke_tab[i];
- WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+ WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable));
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
continue;
@@ -464,18 +562,25 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
if (target) {
/* Plain memcpy is used when image is not live yet
* and still not locked as read-only. Once poke
- * location is active (poke->ip_stable), any parallel
- * bpf_arch_text_poke() might occur still on the
- * read-write image until we finally locked it as
- * read-only. Both modifications on the given image
- * are under text_mutex to avoid interference.
+ * location is active (poke->tailcall_target_stable),
+ * any parallel bpf_arch_text_poke() might occur
+ * still on the read-write image until we finally
+ * locked it as read-only. Both modifications on
+ * the given image are under text_mutex to avoid
+ * interference.
*/
- ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
+ ret = __bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP, NULL,
(u8 *)target->bpf_func +
poke->adj_off, false);
BUG_ON(ret < 0);
+ ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ (u8 *)poke->tailcall_target +
+ X86_PATCH_SIZE, NULL, false);
+ BUG_ON(ret < 0);
}
- WRITE_ONCE(poke->ip_stable, true);
+ WRITE_ONCE(poke->tailcall_target_stable, true);
mutex_unlock(&array->aux->poke_mutex);
}
}
@@ -652,19 +757,49 @@ static bool ex_handler_bpf(const struct exception_table_entry *x,
return true;
}
+static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
+ bool *regs_used, bool *tail_call_seen)
+{
+ int i;
+
+ for (i = 1; i <= insn_cnt; i++, insn++) {
+ if (insn->code == (BPF_JMP | BPF_TAIL_CALL))
+ *tail_call_seen = true;
+ if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
+ regs_used[0] = true;
+ if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
+ regs_used[1] = true;
+ if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
+ regs_used[2] = true;
+ if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
+ regs_used[3] = true;
+ }
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
+ bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
struct bpf_insn *insn = bpf_prog->insnsi;
+ bool callee_regs_used[4] = {};
int insn_cnt = bpf_prog->len;
+ bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
+ detect_reg_usage(insn, insn_cnt, callee_regs_used,
+ &tail_call_seen);
+
+ /* tail call's presence in current prog implies it is reachable */
+ tail_call_reachable |= tail_call_seen;
+
emit_prologue(&prog, bpf_prog->aux->stack_depth,
- bpf_prog_was_classic(bpf_prog));
+ bpf_prog_was_classic(bpf_prog), tail_call_reachable,
+ bpf_prog->aux->func_idx != 0);
+ push_callee_regs(&prog, callee_regs_used);
addrs[0] = prog - temp;
for (i = 1; i <= insn_cnt; i++, insn++) {
@@ -1102,16 +1237,27 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
- return -EINVAL;
+ if (tail_call_reachable) {
+ EMIT3_off32(0x48, 0x8B, 0x85,
+ -(bpf_prog->aux->stack_depth + 8));
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
+ return -EINVAL;
+ } else {
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
+ return -EINVAL;
+ }
break;
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
- &prog, addrs[i], image);
+ &prog, addrs[i], image,
+ callee_regs_used,
+ bpf_prog->aux->stack_depth);
else
- emit_bpf_tail_call_indirect(&prog);
+ emit_bpf_tail_call_indirect(&prog,
+ callee_regs_used,
+ bpf_prog->aux->stack_depth);
break;
/* cond jump */
@@ -1294,12 +1440,9 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
- if (!bpf_prog_was_classic(bpf_prog))
- EMIT1(0x5B); /* get rid of tail_call_cnt */
- EMIT2(0x41, 0x5F); /* pop r15 */
- EMIT2(0x41, 0x5E); /* pop r14 */
- EMIT2(0x41, 0x5D); /* pop r13 */
- EMIT1(0x5B); /* pop rbx */
+ pop_callee_regs(&prog, callee_regs_used);
+ if (tail_call_reachable)
+ EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 19403e88daa3..e86925134009 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -9,7 +9,7 @@
//
// Based on code originally by Andrey Volkov <[email protected]>
-#include <linux/netdevice.h>
+#include <linux/bitfield.h>
#include <linux/can.h>
#include <linux/can/dev.h>
#include <linux/can/error.h>
@@ -21,12 +21,14 @@
#include <linux/io.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
+#include <linux/netdevice.h>
#include <linux/of.h>
#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
-#include <linux/regulator/consumer.h>
#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
#define DRV_NAME "flexcan"
@@ -52,6 +54,7 @@
#define FLEXCAN_MCR_IRMQ BIT(16)
#define FLEXCAN_MCR_LPRIO_EN BIT(13)
#define FLEXCAN_MCR_AEN BIT(12)
+#define FLEXCAN_MCR_FDEN BIT(11)
/* MCR_MAXMB: maximum used MBs is MAXMB + 1 */
#define FLEXCAN_MCR_MAXMB(x) ((x) & 0x7f)
#define FLEXCAN_MCR_IDAM_A (0x0 << 8)
@@ -91,6 +94,7 @@
#define FLEXCAN_CTRL2_MRP BIT(18)
#define FLEXCAN_CTRL2_RRS BIT(17)
#define FLEXCAN_CTRL2_EACEN BIT(16)
+#define FLEXCAN_CTRL2_ISOCANFDEN BIT(12)
/* FLEXCAN memory error control register (MECR) bits */
#define FLEXCAN_MECR_ECRWRDIS BIT(31)
@@ -134,8 +138,35 @@
(FLEXCAN_ESR_ERR_BUS | FLEXCAN_ESR_ERR_STATE)
#define FLEXCAN_ESR_ALL_INT \
(FLEXCAN_ESR_TWRN_INT | FLEXCAN_ESR_RWRN_INT | \
- FLEXCAN_ESR_BOFF_INT | FLEXCAN_ESR_ERR_INT | \
- FLEXCAN_ESR_WAK_INT)
+ FLEXCAN_ESR_BOFF_INT | FLEXCAN_ESR_ERR_INT)
+
+/* FLEXCAN Bit Timing register (CBT) bits */
+#define FLEXCAN_CBT_BTF BIT(31)
+#define FLEXCAN_CBT_EPRESDIV_MASK GENMASK(30, 21)
+#define FLEXCAN_CBT_ERJW_MASK GENMASK(20, 16)
+#define FLEXCAN_CBT_EPROPSEG_MASK GENMASK(15, 10)
+#define FLEXCAN_CBT_EPSEG1_MASK GENMASK(9, 5)
+#define FLEXCAN_CBT_EPSEG2_MASK GENMASK(4, 0)
+
+/* FLEXCAN FD control register (FDCTRL) bits */
+#define FLEXCAN_FDCTRL_FDRATE BIT(31)
+#define FLEXCAN_FDCTRL_MBDSR1 GENMASK(20, 19)
+#define FLEXCAN_FDCTRL_MBDSR0 GENMASK(17, 16)
+#define FLEXCAN_FDCTRL_MBDSR_8 0x0
+#define FLEXCAN_FDCTRL_MBDSR_12 0x1
+#define FLEXCAN_FDCTRL_MBDSR_32 0x2
+#define FLEXCAN_FDCTRL_MBDSR_64 0x3
+#define FLEXCAN_FDCTRL_TDCEN BIT(15)
+#define FLEXCAN_FDCTRL_TDCFAIL BIT(14)
+#define FLEXCAN_FDCTRL_TDCOFF GENMASK(12, 8)
+#define FLEXCAN_FDCTRL_TDCVAL GENMASK(5, 0)
+
+/* FLEXCAN FD Bit Timing register (FDCBT) bits */
+#define FLEXCAN_FDCBT_FPRESDIV_MASK GENMASK(29, 20)
+#define FLEXCAN_FDCBT_FRJW_MASK GENMASK(18, 16)
+#define FLEXCAN_FDCBT_FPROPSEG_MASK GENMASK(14, 10)
+#define FLEXCAN_FDCBT_FPSEG1_MASK GENMASK(7, 5)
+#define FLEXCAN_FDCBT_FPSEG2_MASK GENMASK(2, 0)
/* FLEXCAN interrupt flag register (IFLAG) bits */
/* Errata ERR005829 step7: Reserve first valid MB */
@@ -161,6 +192,9 @@
#define FLEXCAN_MB_CODE_TX_DATA (0xc << 24)
#define FLEXCAN_MB_CODE_TX_TANSWER (0xe << 24)
+#define FLEXCAN_MB_CNT_EDL BIT(31)
+#define FLEXCAN_MB_CNT_BRS BIT(30)
+#define FLEXCAN_MB_CNT_ESI BIT(29)
#define FLEXCAN_MB_CNT_SRR BIT(22)
#define FLEXCAN_MB_CNT_IDE BIT(21)
#define FLEXCAN_MB_CNT_RTR BIT(20)
@@ -172,26 +206,39 @@
/* FLEXCAN hardware feature flags
*
* Below is some version info we got:
- * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR re-
- * Filter? connected? Passive detection ception in MB
- * MX25 FlexCAN2 03.00.00.00 no no no no no
- * MX28 FlexCAN2 03.00.04.00 yes yes no no no
- * MX35 FlexCAN2 03.00.00.00 no no no no no
- * MX53 FlexCAN2 03.00.00.00 yes no no no no
- * MX6s FlexCAN3 10.00.12.00 yes yes no no yes
- * VF610 FlexCAN3 ? no yes no yes yes?
- * LS1021A FlexCAN2 03.00.04.00 no yes no no yes
+ * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode
+ * Filter? connected? Passive detection ption in MB Supported?
+ * MX25 FlexCAN2 03.00.00.00 no no no no no no
+ * MX28 FlexCAN2 03.00.04.00 yes yes no no no no
+ * MX35 FlexCAN2 03.00.00.00 no no no no no no
+ * MX53 FlexCAN2 03.00.00.00 yes no no no no no
+ * MX6s FlexCAN3 10.00.12.00 yes yes no no yes no
+ * MX8QM FlexCAN3 03.00.23.00 yes yes no no yes yes
+ * VF610 FlexCAN3 ? no yes no yes yes? no
+ * LS1021A FlexCAN2 03.00.04.00 no yes no no yes no
+ * LX2160A FlexCAN3 03.00.23.00 no yes no no yes yes
*
* Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
*/
-#define FLEXCAN_QUIRK_BROKEN_WERR_STATE BIT(1) /* [TR]WRN_INT not connected */
-#define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2) /* Disable RX FIFO Global mask */
-#define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3) /* Enable EACEN and RRS bit in ctrl2 */
-#define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */
-#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */
-#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6) /* No interrupt for error passive */
-#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN BIT(7) /* default to BE register access */
-#define FLEXCAN_QUIRK_SETUP_STOP_MODE BIT(8) /* Setup stop mode to support wakeup */
+
+/* [TR]WRN_INT not connected */
+#define FLEXCAN_QUIRK_BROKEN_WERR_STATE BIT(1)
+ /* Disable RX FIFO Global mask */
+#define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2)
+/* Enable EACEN and RRS bit in ctrl2 */
+#define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3)
+/* Disable non-correctable errors interrupt and freeze mode */
+#define FLEXCAN_QUIRK_DISABLE_MECR BIT(4)
+/* Use timestamp based offloading */
+#define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5)
+/* No interrupt for error passive */
+#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6)
+/* default to BE register access */
+#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN BIT(7)
+/* Setup stop mode to support wakeup */
+#define FLEXCAN_QUIRK_SETUP_STOP_MODE BIT(8)
+/* Support CAN-FD mode */
+#define FLEXCAN_QUIRK_SUPPORT_FD BIT(9)
/* Structure of the message buffer */
struct flexcan_mb {
@@ -203,12 +250,12 @@ struct flexcan_mb {
/* Structure of the hardware registers */
struct flexcan_regs {
u32 mcr; /* 0x00 */
- u32 ctrl; /* 0x04 */
+ u32 ctrl; /* 0x04 - Not affected by Soft Reset */
u32 timer; /* 0x08 */
- u32 _reserved1; /* 0x0c */
- u32 rxgmask; /* 0x10 */
- u32 rx14mask; /* 0x14 */
- u32 rx15mask; /* 0x18 */
+ u32 tcr; /* 0x0c */
+ u32 rxgmask; /* 0x10 - Not affected by Soft Reset */
+ u32 rx14mask; /* 0x14 - Not affected by Soft Reset */
+ u32 rx15mask; /* 0x18 - Not affected by Soft Reset */
u32 ecr; /* 0x1c */
u32 esr; /* 0x20 */
u32 imask2; /* 0x24 */
@@ -217,16 +264,20 @@ struct flexcan_regs {
u32 iflag1; /* 0x30 */
union { /* 0x34 */
u32 gfwr_mx28; /* MX28, MX53 */
- u32 ctrl2; /* MX6, VF610 */
+ u32 ctrl2; /* MX6, VF610 - Not affected by Soft Reset */
};
u32 esr2; /* 0x38 */
u32 imeur; /* 0x3c */
u32 lrfr; /* 0x40 */
u32 crcr; /* 0x44 */
u32 rxfgmask; /* 0x48 */
- u32 rxfir; /* 0x4c */
- u32 _reserved3[12]; /* 0x50 */
- u8 mb[2][512]; /* 0x80 */
+ u32 rxfir; /* 0x4c - Not affected by Soft Reset */
+ u32 cbt; /* 0x50 - Not affected by Soft Reset */
+ u32 _reserved2; /* 0x54 */
+ u32 dbg1; /* 0x58 */
+ u32 dbg2; /* 0x5c */
+ u32 _reserved3[8]; /* 0x60 */
+ u8 mb[2][512]; /* 0x80 - Not affected by Soft Reset */
/* FIFO-mode:
* MB
* 0x080...0x08f 0 RX message buffer
@@ -238,7 +289,7 @@ struct flexcan_regs {
* (mx6, vf610)
*/
u32 _reserved4[256]; /* 0x480 */
- u32 rximr[64]; /* 0x880 */
+ u32 rximr[64]; /* 0x880 - Not affected by Soft Reset */
u32 _reserved5[24]; /* 0x980 */
u32 gfwr_mx6; /* 0x9e0 - MX6 */
u32 _reserved6[63]; /* 0x9e4 */
@@ -250,8 +301,14 @@ struct flexcan_regs {
u32 rerrdr; /* 0xaf4 */
u32 rerrsynr; /* 0xaf8 */
u32 errsr; /* 0xafc */
+ u32 _reserved7[64]; /* 0xb00 */
+ u32 fdctrl; /* 0xc00 - Not affected by Soft Reset */
+ u32 fdcbt; /* 0xc04 - Not affected by Soft Reset */
+ u32 fdcrc; /* 0xc08 */
};
+static_assert(sizeof(struct flexcan_regs) == 0x4 + 0xc08);
+
struct flexcan_devtype_data {
u32 quirks; /* quirks needed for different IP cores */
};
@@ -313,6 +370,12 @@ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = {
FLEXCAN_QUIRK_SETUP_STOP_MODE,
};
+static const struct flexcan_devtype_data fsl_imx8qm_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+ FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_SUPPORT_FD,
+};
+
static const struct flexcan_devtype_data fsl_vf610_devtype_data = {
.quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP |
@@ -325,6 +388,12 @@ static const struct flexcan_devtype_data fsl_ls1021a_r2_devtype_data = {
FLEXCAN_QUIRK_USE_OFF_TIMESTAMP,
};
+static const struct flexcan_devtype_data fsl_lx2160a_r1_devtype_data = {
+ .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS |
+ FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+ FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_SUPPORT_FD,
+};
+
static const struct can_bittiming_const flexcan_bittiming_const = {
.name = DRV_NAME,
.tseg1_min = 4,
@@ -337,6 +406,30 @@ static const struct can_bittiming_const flexcan_bittiming_const = {
.brp_inc = 1,
};
+static const struct can_bittiming_const flexcan_fd_bittiming_const = {
+ .name = DRV_NAME,
+ .tseg1_min = 2,
+ .tseg1_max = 96,
+ .tseg2_min = 2,
+ .tseg2_max = 32,
+ .sjw_max = 16,
+ .brp_min = 1,
+ .brp_max = 1024,
+ .brp_inc = 1,
+};
+
+static const struct can_bittiming_const flexcan_fd_data_bittiming_const = {
+ .name = DRV_NAME,
+ .tseg1_min = 2,
+ .tseg1_max = 39,
+ .tseg2_min = 2,
+ .tseg2_max = 8,
+ .sjw_max = 4,
+ .brp_min = 1,
+ .brp_max = 1024,
+ .brp_inc = 1,
+};
+
/* FlexCAN module is essentially modelled as a little-endian IP in most
* SoCs, i.e the registers as well as the message buffer areas are
* implemented in a little-endian fashion.
@@ -457,7 +550,6 @@ static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv)
regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr,
1 << priv->stm.req_bit, 0);
-
reg_mcr = priv->read(&regs->mcr);
reg_mcr &= ~FLEXCAN_MCR_SLF_WAK;
priv->write(reg_mcr, &regs->mcr);
@@ -628,10 +720,10 @@ static int flexcan_get_berr_counter(const struct net_device *dev,
static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
const struct flexcan_priv *priv = netdev_priv(dev);
- struct can_frame *cf = (struct can_frame *)skb->data;
+ struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
u32 can_id;
u32 data;
- u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | (cf->can_dlc << 16);
+ u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | ((can_len2dlc(cfd->len)) << 16);
int i;
if (can_dropped_invalid_skb(dev, skb))
@@ -639,18 +731,25 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de
netif_stop_queue(dev);
- if (cf->can_id & CAN_EFF_FLAG) {
- can_id = cf->can_id & CAN_EFF_MASK;
+ if (cfd->can_id & CAN_EFF_FLAG) {
+ can_id = cfd->can_id & CAN_EFF_MASK;
ctrl |= FLEXCAN_MB_CNT_IDE | FLEXCAN_MB_CNT_SRR;
} else {
- can_id = (cf->can_id & CAN_SFF_MASK) << 18;
+ can_id = (cfd->can_id & CAN_SFF_MASK) << 18;
}
- if (cf->can_id & CAN_RTR_FLAG)
+ if (cfd->can_id & CAN_RTR_FLAG)
ctrl |= FLEXCAN_MB_CNT_RTR;
- for (i = 0; i < cf->can_dlc; i += sizeof(u32)) {
- data = be32_to_cpup((__be32 *)&cf->data[i]);
+ if (can_is_canfd_skb(skb)) {
+ ctrl |= FLEXCAN_MB_CNT_EDL;
+
+ if (cfd->flags & CANFD_BRS)
+ ctrl |= FLEXCAN_MB_CNT_BRS;
+ }
+
+ for (i = 0; i < cfd->len; i += sizeof(u32)) {
+ data = be32_to_cpup((__be32 *)&cfd->data[i]);
priv->write(data, &priv->tx_mb->data[i / sizeof(u32)]);
}
@@ -822,7 +921,7 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
struct flexcan_regs __iomem *regs = priv->regs;
struct flexcan_mb __iomem *mb;
struct sk_buff *skb;
- struct can_frame *cf;
+ struct canfd_frame *cfd;
u32 reg_ctrl, reg_id, reg_iflag1;
int i;
@@ -859,8 +958,11 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
reg_ctrl = priv->read(&mb->can_ctrl);
}
- skb = alloc_can_skb(offload->dev, &cf);
- if (!skb) {
+ if (reg_ctrl & FLEXCAN_MB_CNT_EDL)
+ skb = alloc_canfd_skb(offload->dev, &cfd);
+ else
+ skb = alloc_can_skb(offload->dev, (struct can_frame **)&cfd);
+ if (unlikely(!skb)) {
skb = ERR_PTR(-ENOMEM);
goto mark_as_read;
}
@@ -870,17 +972,28 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
reg_id = priv->read(&mb->can_id);
if (reg_ctrl & FLEXCAN_MB_CNT_IDE)
- cf->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
+ cfd->can_id = ((reg_id >> 0) & CAN_EFF_MASK) | CAN_EFF_FLAG;
else
- cf->can_id = (reg_id >> 18) & CAN_SFF_MASK;
+ cfd->can_id = (reg_id >> 18) & CAN_SFF_MASK;
+
+ if (reg_ctrl & FLEXCAN_MB_CNT_EDL) {
+ cfd->len = can_dlc2len(get_canfd_dlc((reg_ctrl >> 16) & 0xf));
+
+ if (reg_ctrl & FLEXCAN_MB_CNT_BRS)
+ cfd->flags |= CANFD_BRS;
+ } else {
+ cfd->len = get_can_dlc((reg_ctrl >> 16) & 0xf);
+
+ if (reg_ctrl & FLEXCAN_MB_CNT_RTR)
+ cfd->can_id |= CAN_RTR_FLAG;
+ }
- if (reg_ctrl & FLEXCAN_MB_CNT_RTR)
- cf->can_id |= CAN_RTR_FLAG;
- cf->can_dlc = get_can_dlc((reg_ctrl >> 16) & 0xf);
+ if (reg_ctrl & FLEXCAN_MB_CNT_ESI)
+ cfd->flags |= CANFD_ESI;
- for (i = 0; i < cf->can_dlc; i += sizeof(u32)) {
+ for (i = 0; i < cfd->len; i += sizeof(u32)) {
__be32 data = cpu_to_be32(priv->read(&mb->data[i / sizeof(u32)]));
- *(__be32 *)(cf->data + i) = data;
+ *(__be32 *)(cfd->data + i) = data;
}
mark_as_read:
@@ -961,10 +1074,10 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
reg_esr = priv->read(&regs->esr);
- /* ACK all bus error and state change IRQ sources */
- if (reg_esr & FLEXCAN_ESR_ALL_INT) {
+ /* ACK all bus error, state change and wake IRQ sources */
+ if (reg_esr & (FLEXCAN_ESR_ALL_INT | FLEXCAN_ESR_WAK_INT)) {
handled = IRQ_HANDLED;
- priv->write(reg_esr & FLEXCAN_ESR_ALL_INT, &regs->esr);
+ priv->write(reg_esr & (FLEXCAN_ESR_ALL_INT | FLEXCAN_ESR_WAK_INT), &regs->esr);
}
/* state change interrupt or broken error state quirk fix is enabled */
@@ -1019,7 +1132,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
return handled;
}
-static void flexcan_set_bittiming(struct net_device *dev)
+static void flexcan_set_bittiming_ctrl(const struct net_device *dev)
{
const struct flexcan_priv *priv = netdev_priv(dev);
const struct can_bittiming *bt = &priv->can.bittiming;
@@ -1031,10 +1144,7 @@ static void flexcan_set_bittiming(struct net_device *dev)
FLEXCAN_CTRL_RJW(0x3) |
FLEXCAN_CTRL_PSEG1(0x7) |
FLEXCAN_CTRL_PSEG2(0x7) |
- FLEXCAN_CTRL_PROPSEG(0x7) |
- FLEXCAN_CTRL_LPB |
- FLEXCAN_CTRL_SMP |
- FLEXCAN_CTRL_LOM);
+ FLEXCAN_CTRL_PROPSEG(0x7));
reg |= FLEXCAN_CTRL_PRESDIV(bt->brp - 1) |
FLEXCAN_CTRL_PSEG1(bt->phase_seg1 - 1) |
@@ -1042,6 +1152,130 @@ static void flexcan_set_bittiming(struct net_device *dev)
FLEXCAN_CTRL_RJW(bt->sjw - 1) |
FLEXCAN_CTRL_PROPSEG(bt->prop_seg - 1);
+ netdev_dbg(dev, "writing ctrl=0x%08x\n", reg);
+ priv->write(reg, &regs->ctrl);
+
+ /* print chip status */
+ netdev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__,
+ priv->read(&regs->mcr), priv->read(&regs->ctrl));
+}
+
+static void flexcan_set_bittiming_cbt(const struct net_device *dev)
+{
+ struct flexcan_priv *priv = netdev_priv(dev);
+ struct can_bittiming *bt = &priv->can.bittiming;
+ struct can_bittiming *dbt = &priv->can.data_bittiming;
+ struct flexcan_regs __iomem *regs = priv->regs;
+ u32 reg_cbt, reg_fdctrl;
+
+ /* CBT */
+ /* CBT[EPSEG1] is 5 bit long and CBT[EPROPSEG] is 6 bit
+ * long. The can_calc_bittiming() tries to divide the tseg1
+ * equally between phase_seg1 and prop_seg, which may not fit
+ * in CBT register. Therefore, if phase_seg1 is more than
+ * possible value, increase prop_seg and decrease phase_seg1.
+ */
+ if (bt->phase_seg1 > 0x20) {
+ bt->prop_seg += (bt->phase_seg1 - 0x20);
+ bt->phase_seg1 = 0x20;
+ }
+
+ reg_cbt = FLEXCAN_CBT_BTF |
+ FIELD_PREP(FLEXCAN_CBT_EPRESDIV_MASK, bt->brp - 1) |
+ FIELD_PREP(FLEXCAN_CBT_ERJW_MASK, bt->sjw - 1) |
+ FIELD_PREP(FLEXCAN_CBT_EPROPSEG_MASK, bt->prop_seg - 1) |
+ FIELD_PREP(FLEXCAN_CBT_EPSEG1_MASK, bt->phase_seg1 - 1) |
+ FIELD_PREP(FLEXCAN_CBT_EPSEG2_MASK, bt->phase_seg2 - 1);
+
+ netdev_dbg(dev, "writing cbt=0x%08x\n", reg_cbt);
+ priv->write(reg_cbt, &regs->cbt);
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+ u32 reg_fdcbt, reg_ctrl2;
+
+ if (bt->brp != dbt->brp)
+ netdev_warn(dev, "Data brp=%d and brp=%d don't match, this may result in a phase error. Consider using different bitrate and/or data bitrate.\n",
+ dbt->brp, bt->brp);
+
+ /* FDCBT */
+ /* FDCBT[FPSEG1] is 3 bit long and FDCBT[FPROPSEG] is
+ * 5 bit long. The can_calc_bittiming tries to divide
+ * the tseg1 equally between phase_seg1 and prop_seg,
+ * which may not fit in FDCBT register. Therefore, if
+ * phase_seg1 is more than possible value, increase
+ * prop_seg and decrease phase_seg1
+ */
+ if (dbt->phase_seg1 > 0x8) {
+ dbt->prop_seg += (dbt->phase_seg1 - 0x8);
+ dbt->phase_seg1 = 0x8;
+ }
+
+ reg_fdcbt = priv->read(&regs->fdcbt);
+ reg_fdcbt &= ~(FIELD_PREP(FLEXCAN_FDCBT_FPRESDIV_MASK, 0x3ff) |
+ FIELD_PREP(FLEXCAN_FDCBT_FRJW_MASK, 0x7) |
+ FIELD_PREP(FLEXCAN_FDCBT_FPROPSEG_MASK, 0x1f) |
+ FIELD_PREP(FLEXCAN_FDCBT_FPSEG1_MASK, 0x7) |
+ FIELD_PREP(FLEXCAN_FDCBT_FPSEG2_MASK, 0x7));
+
+ reg_fdcbt |= FIELD_PREP(FLEXCAN_FDCBT_FPRESDIV_MASK, dbt->brp - 1) |
+ FIELD_PREP(FLEXCAN_FDCBT_FRJW_MASK, dbt->sjw - 1) |
+ FIELD_PREP(FLEXCAN_FDCBT_FPROPSEG_MASK, dbt->prop_seg) |
+ FIELD_PREP(FLEXCAN_FDCBT_FPSEG1_MASK, dbt->phase_seg1 - 1) |
+ FIELD_PREP(FLEXCAN_FDCBT_FPSEG2_MASK, dbt->phase_seg2 - 1);
+
+ netdev_dbg(dev, "writing fdcbt=0x%08x\n", reg_fdcbt);
+ priv->write(reg_fdcbt, &regs->fdcbt);
+
+ /* CTRL2 */
+ reg_ctrl2 = priv->read(&regs->ctrl2);
+ reg_ctrl2 &= ~FLEXCAN_CTRL2_ISOCANFDEN;
+ if (!(priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO))
+ reg_ctrl2 |= FLEXCAN_CTRL2_ISOCANFDEN;
+
+ netdev_dbg(dev, "writing ctrl2=0x%08x\n", reg_ctrl2);
+ priv->write(reg_ctrl2, &regs->ctrl2);
+ }
+
+ /* FDCTRL */
+ reg_fdctrl = priv->read(&regs->fdctrl);
+ reg_fdctrl &= ~(FLEXCAN_FDCTRL_FDRATE |
+ FIELD_PREP(FLEXCAN_FDCTRL_TDCOFF, 0x1f));
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+ reg_fdctrl |= FLEXCAN_FDCTRL_FDRATE;
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) {
+ /* TDC must be disabled for Loop Back mode */
+ reg_fdctrl &= ~FLEXCAN_FDCTRL_TDCEN;
+ } else {
+ reg_fdctrl |= FLEXCAN_FDCTRL_TDCEN |
+ FIELD_PREP(FLEXCAN_FDCTRL_TDCOFF,
+ ((dbt->phase_seg1 - 1) +
+ dbt->prop_seg + 2) *
+ ((dbt->brp - 1 ) + 1));
+ }
+ }
+
+ netdev_dbg(dev, "writing fdctrl=0x%08x\n", reg_fdctrl);
+ priv->write(reg_fdctrl, &regs->fdctrl);
+
+ netdev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x ctrl2=0x%08x fdctrl=0x%08x cbt=0x%08x fdcbt=0x%08x\n",
+ __func__,
+ priv->read(&regs->mcr), priv->read(&regs->ctrl),
+ priv->read(&regs->ctrl2), priv->read(&regs->fdctrl),
+ priv->read(&regs->cbt), priv->read(&regs->fdcbt));
+}
+
+static void flexcan_set_bittiming(struct net_device *dev)
+{
+ const struct flexcan_priv *priv = netdev_priv(dev);
+ struct flexcan_regs __iomem *regs = priv->regs;
+ u32 reg;
+
+ reg = priv->read(&regs->ctrl);
+ reg &= ~(FLEXCAN_CTRL_LPB | FLEXCAN_CTRL_SMP |
+ FLEXCAN_CTRL_LOM);
+
if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
reg |= FLEXCAN_CTRL_LPB;
if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
@@ -1052,9 +1286,10 @@ static void flexcan_set_bittiming(struct net_device *dev)
netdev_dbg(dev, "writing ctrl=0x%08x\n", reg);
priv->write(reg, &regs->ctrl);
- /* print chip status */
- netdev_dbg(dev, "%s: mcr=0x%08x ctrl=0x%08x\n", __func__,
- priv->read(&regs->mcr), priv->read(&regs->ctrl));
+ if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD)
+ return flexcan_set_bittiming_cbt(dev);
+ else
+ return flexcan_set_bittiming_ctrl(dev);
}
/* flexcan_chip_start
@@ -1127,6 +1362,12 @@ static int flexcan_chip_start(struct net_device *dev)
else
reg_mcr |= FLEXCAN_MCR_SRX_DIS;
+ /* MCR - CAN-FD */
+ if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
+ reg_mcr |= FLEXCAN_MCR_FDEN;
+ else
+ reg_mcr &= ~FLEXCAN_MCR_FDEN;
+
netdev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr);
priv->write(reg_mcr, &regs->mcr);
@@ -1169,6 +1410,32 @@ static int flexcan_chip_start(struct net_device *dev)
priv->write(reg_ctrl2, &regs->ctrl2);
}
+ if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) {
+ u32 reg_fdctrl;
+
+ reg_fdctrl = priv->read(&regs->fdctrl);
+ reg_fdctrl &= ~(FIELD_PREP(FLEXCAN_FDCTRL_MBDSR1, 0x3) |
+ FIELD_PREP(FLEXCAN_FDCTRL_MBDSR0, 0x3));
+
+ if (priv->can.ctrlmode & CAN_CTRLMODE_FD) {
+ reg_fdctrl |=
+ FIELD_PREP(FLEXCAN_FDCTRL_MBDSR1,
+ FLEXCAN_FDCTRL_MBDSR_64) |
+ FIELD_PREP(FLEXCAN_FDCTRL_MBDSR0,
+ FLEXCAN_FDCTRL_MBDSR_64);
+ } else {
+ reg_fdctrl |=
+ FIELD_PREP(FLEXCAN_FDCTRL_MBDSR1,
+ FLEXCAN_FDCTRL_MBDSR_8) |
+ FIELD_PREP(FLEXCAN_FDCTRL_MBDSR0,
+ FLEXCAN_FDCTRL_MBDSR_8);
+ }
+
+ netdev_dbg(dev, "%s: writing fdctrl=0x%08x",
+ __func__, reg_fdctrl);
+ priv->write(reg_fdctrl, &regs->fdctrl);
+ }
+
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) {
mb = flexcan_get_mb(priv, i);
@@ -1204,28 +1471,43 @@ static int flexcan_chip_start(struct net_device *dev)
for (i = 0; i < priv->mb_count; i++)
priv->write(0, &regs->rximr[i]);
- /* On Vybrid, disable memory error detection interrupts
- * and freeze mode.
- * This also works around errata e5295 which generates
- * false positive memory errors and put the device in
- * freeze mode.
+ /* On Vybrid, disable non-correctable errors interrupt and
+ * freeze mode. It still can correct the correctable errors
+ * when HW supports ECC.
+ *
+ * This also works around errata e5295 which generates false
+ * positive memory errors and put the device in freeze mode.
*/
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_DISABLE_MECR) {
/* Follow the protocol as described in "Detection
* and Correction of Memory Errors" to write to
- * MECR register
+ * MECR register (step 1 - 5)
+ *
+ * 1. By default, CTRL2[ECRWRE] = 0, MECR[ECRWRDIS] = 1
+ * 2. set CTRL2[ECRWRE]
*/
reg_ctrl2 = priv->read(&regs->ctrl2);
reg_ctrl2 |= FLEXCAN_CTRL2_ECRWRE;
priv->write(reg_ctrl2, &regs->ctrl2);
+ /* 3. clear MECR[ECRWRDIS] */
reg_mecr = priv->read(&regs->mecr);
reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS;
priv->write(reg_mecr, &regs->mecr);
- reg_mecr |= FLEXCAN_MECR_ECCDIS;
+
+ /* 4. all writes to MECR must keep MECR[ECRWRDIS] cleared */
reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK |
FLEXCAN_MECR_FANCEI_MSK);
priv->write(reg_mecr, &regs->mecr);
+
+ /* 5. after configuration done, lock MECR by either
+ * setting MECR[ECRWRDIS] or clearing CTRL2[ECRWRE]
+ */
+ reg_mecr |= FLEXCAN_MECR_ECRWRDIS;
+ priv->write(reg_mecr, &regs->mecr);
+
+ reg_ctrl2 &= ~FLEXCAN_CTRL2_ECRWRE;
+ priv->write(reg_ctrl2, &regs->ctrl2);
}
err = flexcan_transceiver_enable(priv);
@@ -1260,18 +1542,23 @@ static int flexcan_chip_start(struct net_device *dev)
return err;
}
-/* flexcan_chip_stop
+/* __flexcan_chip_stop
*
- * this functions is entered with clocks enabled
+ * this function is entered with clocks enabled
*/
-static void flexcan_chip_stop(struct net_device *dev)
+static int __flexcan_chip_stop(struct net_device *dev, bool disable_on_error)
{
struct flexcan_priv *priv = netdev_priv(dev);
struct flexcan_regs __iomem *regs = priv->regs;
+ int err;
/* freeze + disable module */
- flexcan_chip_freeze(priv);
- flexcan_chip_disable(priv);
+ err = flexcan_chip_freeze(priv);
+ if (err && !disable_on_error)
+ return err;
+ err = flexcan_chip_disable(priv);
+ if (err && !disable_on_error)
+ goto out_chip_unfreeze;
/* Disable all interrupts */
priv->write(0, &regs->imask2);
@@ -1281,6 +1568,23 @@ static void flexcan_chip_stop(struct net_device *dev)
flexcan_transceiver_disable(priv);
priv->can.state = CAN_STATE_STOPPED;
+
+ return 0;
+
+ out_chip_unfreeze:
+ flexcan_chip_unfreeze(priv);
+
+ return err;
+}
+
+static inline int flexcan_chip_stop_disable_on_error(struct net_device *dev)
+{
+ return __flexcan_chip_stop(dev, true);
+}
+
+static inline int flexcan_chip_stop(struct net_device *dev)
+{
+ return __flexcan_chip_stop(dev, false);
}
static int flexcan_open(struct net_device *dev)
@@ -1288,6 +1592,12 @@ static int flexcan_open(struct net_device *dev)
struct flexcan_priv *priv = netdev_priv(dev);
int err;
+ if ((priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) &&
+ (priv->can.ctrlmode & CAN_CTRLMODE_FD)) {
+ netdev_err(dev, "Three Samples mode and CAN-FD mode can't be used together\n");
+ return -EINVAL;
+ }
+
err = pm_runtime_get_sync(priv->dev);
if (err < 0)
return err;
@@ -1300,7 +1610,10 @@ static int flexcan_open(struct net_device *dev)
if (err)
goto out_close;
- priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
+ if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
+ priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN;
+ else
+ priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
(sizeof(priv->regs->mb[1]) / priv->mb_size);
@@ -1362,7 +1675,7 @@ static int flexcan_close(struct net_device *dev)
netif_stop_queue(dev);
can_rx_offload_disable(&priv->offload);
- flexcan_chip_stop(dev);
+ flexcan_chip_stop_disable_on_error(dev);
can_rx_offload_del(&priv->offload);
free_irq(dev->irq, dev);
@@ -1531,6 +1844,7 @@ out_put_node:
}
static const struct of_device_id flexcan_of_match[] = {
+ { .compatible = "fsl,imx8qm-flexcan", .data = &fsl_imx8qm_devtype_data, },
{ .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, },
{ .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, },
{ .compatible = "fsl,imx53-flexcan", .data = &fsl_imx25_devtype_data, },
@@ -1539,6 +1853,7 @@ static const struct of_device_id flexcan_of_match[] = {
{ .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, },
{ .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, },
{ .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, },
+ { .compatible = "fsl,lx2160ar1-flexcan", .data = &fsl_lx2160a_r1_devtype_data, },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, flexcan_of_match);
@@ -1562,11 +1877,13 @@ static int flexcan_probe(struct platform_device *pdev)
u8 clk_src = 1;
u32 clock_freq = 0;
- reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver");
+ reg_xceiver = devm_regulator_get_optional(&pdev->dev, "xceiver");
if (PTR_ERR(reg_xceiver) == -EPROBE_DEFER)
return -EPROBE_DEFER;
- else if (IS_ERR(reg_xceiver))
+ else if (PTR_ERR(reg_xceiver) == -ENODEV)
reg_xceiver = NULL;
+ else if (IS_ERR(reg_xceiver))
+ return PTR_ERR(reg_xceiver);
if (pdev->dev.of_node) {
of_property_read_u32(pdev->dev.of_node,
@@ -1608,6 +1925,12 @@ static int flexcan_probe(struct platform_device *pdev)
return -ENODEV;
}
+ if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) &&
+ !(devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)) {
+ dev_err(&pdev->dev, "CAN-FD mode doesn't work with FIFO mode!\n");
+ return -EINVAL;
+ }
+
dev = alloc_candev(sizeof(struct flexcan_priv), 1);
if (!dev)
return -ENOMEM;
@@ -1632,7 +1955,6 @@ static int flexcan_probe(struct platform_device *pdev)
priv->dev = &pdev->dev;
priv->can.clock.freq = clock_freq;
- priv->can.bittiming_const = &flexcan_bittiming_const;
priv->can.do_set_mode = flexcan_set_mode;
priv->can.do_get_berr_counter = flexcan_get_berr_counter;
priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
@@ -1645,6 +1967,16 @@ static int flexcan_probe(struct platform_device *pdev)
priv->devtype_data = devtype_data;
priv->reg_xceiver = reg_xceiver;
+ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
+ priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD |
+ CAN_CTRLMODE_FD_NON_ISO;
+ priv->can.bittiming_const = &flexcan_fd_bittiming_const;
+ priv->can.data_bittiming_const =
+ &flexcan_fd_data_bittiming_const;
+ } else {
+ priv->can.bittiming_const = &flexcan_bittiming_const;
+ }
+
pm_runtime_get_noresume(&pdev->dev);
pm_runtime_set_active(&pdev->dev);
pm_runtime_enable(&pdev->dev);
@@ -1655,6 +1987,7 @@ static int flexcan_probe(struct platform_device *pdev)
goto failed_register;
}
+ of_can_transceiver(dev);
devm_can_led_init(dev);
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE) {
@@ -1685,7 +2018,7 @@ static int __maybe_unused flexcan_suspend(struct device *device)
{
struct net_device *dev = dev_get_drvdata(device);
struct flexcan_priv *priv = netdev_priv(dev);
- int err = 0;
+ int err;
if (netif_running(dev)) {
/* if wakeup is enabled, enter stop mode
@@ -1697,25 +2030,27 @@ static int __maybe_unused flexcan_suspend(struct device *device)
if (err)
return err;
} else {
- err = flexcan_chip_disable(priv);
+ err = flexcan_chip_stop(dev);
if (err)
return err;
- err = pm_runtime_force_suspend(device);
+ err = pinctrl_pm_select_sleep_state(device);
+ if (err)
+ return err;
}
netif_stop_queue(dev);
netif_device_detach(dev);
}
priv->can.state = CAN_STATE_SLEEPING;
- return err;
+ return 0;
}
static int __maybe_unused flexcan_resume(struct device *device)
{
struct net_device *dev = dev_get_drvdata(device);
struct flexcan_priv *priv = netdev_priv(dev);
- int err = 0;
+ int err;
priv->can.state = CAN_STATE_ERROR_ACTIVE;
if (netif_running(dev)) {
@@ -1727,15 +2062,17 @@ static int __maybe_unused flexcan_resume(struct device *device)
if (err)
return err;
} else {
- err = pm_runtime_force_resume(device);
+ err = pinctrl_pm_select_default_state(device);
if (err)
return err;
- err = flexcan_chip_enable(priv);
+ err = flexcan_chip_start(dev);
+ if (err)
+ return err;
}
}
- return err;
+ return 0;
}
static int __maybe_unused flexcan_runtime_suspend(struct device *device)
@@ -1761,8 +2098,16 @@ static int __maybe_unused flexcan_noirq_suspend(struct device *device)
struct net_device *dev = dev_get_drvdata(device);
struct flexcan_priv *priv = netdev_priv(dev);
- if (netif_running(dev) && device_may_wakeup(device))
- flexcan_enable_wakeup_irq(priv, true);
+ if (netif_running(dev)) {
+ int err;
+
+ if (device_may_wakeup(device))
+ flexcan_enable_wakeup_irq(priv, true);
+
+ err = pm_runtime_force_suspend(device);
+ if (err)
+ return err;
+ }
return 0;
}
@@ -1772,8 +2117,16 @@ static int __maybe_unused flexcan_noirq_resume(struct device *device)
struct net_device *dev = dev_get_drvdata(device);
struct flexcan_priv *priv = netdev_priv(dev);
- if (netif_running(dev) && device_may_wakeup(device))
- flexcan_enable_wakeup_irq(priv, false);
+ if (netif_running(dev)) {
+ int err;
+
+ err = pm_runtime_force_resume(device);
+ if (err)
+ return err;
+
+ if (device_may_wakeup(device))
+ flexcan_enable_wakeup_irq(priv, false);
+ }
return 0;
}
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 6a5796c32721..73507cff3bc4 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1377,23 +1377,6 @@ EXPORT_SYMBOL(b53_phylink_mac_link_up);
int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering)
{
struct b53_device *dev = ds->priv;
- u16 pvid, new_pvid;
-
- b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid);
- if (!vlan_filtering) {
- /* Filtering is currently enabled, use the default PVID since
- * the bridge does not expect tagging anymore
- */
- dev->ports[port].pvid = pvid;
- new_pvid = b53_default_pvid(dev);
- } else {
- /* Filtering is currently disabled, restore the previous PVID */
- new_pvid = dev->ports[port].pvid;
- }
-
- if (pvid != new_pvid)
- b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port),
- new_pvid);
b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering);
@@ -2619,6 +2602,8 @@ struct b53_device *b53_switch_alloc(struct device *base,
dev->priv = priv;
dev->ops = ops;
ds->ops = &b53_switch_ops;
+ ds->configure_vlan_while_not_filtering = true;
+ dev->vlan_enabled = ds->configure_vlan_while_not_filtering;
mutex_init(&dev->reg_mutex);
mutex_init(&dev->stats_mutex);
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index c55c0a9f1b47..24893b592216 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -91,7 +91,6 @@ enum {
struct b53_port {
u16 vlan_ctl_mask;
struct ethtool_eee eee;
- u16 pvid;
};
struct b53_vlan {
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 723820603107..0b5b2b33b3b6 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -457,6 +457,7 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
{
struct device_node *port;
unsigned int port_num;
+ struct property *prop;
phy_interface_t mode;
int err;
@@ -483,6 +484,16 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
if (of_property_read_bool(port, "brcm,use-bcm-hdr"))
priv->brcm_tag_mask |= 1 << port_num;
+
+ /* Ensure that port 5 is not picked up as a DSA CPU port
+ * flavour but a regular port instead. We should be using
+ * devlink to be able to set the port flavour.
+ */
+ if (port_num == 5 && priv->type == BCM7278_DEVICE_ID) {
+ prop = of_find_property(port, "ethernet", NULL);
+ if (prop)
+ of_remove_property(port, prop);
+ }
}
}
@@ -527,7 +538,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds)
* driver.
*/
if (of_machine_is_compatible("brcm,bcm7445d0"))
- priv->indir_phy_mask |= (1 << BRCM_PSEUDO_PHY_ADDR);
+ priv->indir_phy_mask |= (1 << BRCM_PSEUDO_PHY_ADDR) | (1 << 0);
else
priv->indir_phy_mask = 0;
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 5f395d4119ac..a56fc50f5be4 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -669,8 +669,11 @@ static bool felix_txtstamp(struct dsa_switch *ds, int port,
struct ocelot *ocelot = ds->priv;
struct ocelot_port *ocelot_port = ocelot->ports[port];
- if (!ocelot_port_add_txtstamp_skb(ocelot_port, clone))
+ if (ocelot->ptp && (skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP) &&
+ ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+ ocelot_port_add_txtstamp_skb(ocelot, port, clone);
return true;
+ }
return false;
}
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index b3b8a8010142..862ea44beea7 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -640,13 +640,11 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id)
struct net_device *dev = dev_id;
struct emac_board_info *db = netdev_priv(dev);
int int_status;
- unsigned long flags;
unsigned int reg_val;
/* A real interrupt coming */
- /* holders of db->lock must always block IRQs */
- spin_lock_irqsave(&db->lock, flags);
+ spin_lock(&db->lock);
/* Disable all interrupts */
writel(0, db->membase + EMAC_INT_CTL_REG);
@@ -680,7 +678,7 @@ static irqreturn_t emac_interrupt(int irq, void *dev_id)
reg_val |= (0xf << 0) | (0x01 << 8);
writel(reg_val, db->membase + EMAC_INT_CTL_REG);
}
- spin_unlock_irqrestore(&db->lock, flags);
+ spin_unlock(&db->lock);
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index feea797cde02..cfd369cf4c8c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -3,6 +3,7 @@ config FSL_DPAA2_ETH
tristate "Freescale DPAA2 Ethernet"
depends on FSL_MC_BUS && FSL_MC_DPIO
select PHYLINK
+ select PCS_LYNX
help
This is the DPAA2 Ethernet driver supporting Freescale SoCs
with DPAA2 (DataPath Acceleration Architecture v2).
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index 3ee236c5fc37..6ff64dd1cf27 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -15,6 +15,18 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode)
case DPMAC_ETH_IF_RGMII:
*if_mode = PHY_INTERFACE_MODE_RGMII;
break;
+ case DPMAC_ETH_IF_USXGMII:
+ *if_mode = PHY_INTERFACE_MODE_USXGMII;
+ break;
+ case DPMAC_ETH_IF_QSGMII:
+ *if_mode = PHY_INTERFACE_MODE_QSGMII;
+ break;
+ case DPMAC_ETH_IF_SGMII:
+ *if_mode = PHY_INTERFACE_MODE_SGMII;
+ break;
+ case DPMAC_ETH_IF_XFI:
+ *if_mode = PHY_INTERFACE_MODE_10GBASER;
+ break;
default:
return -EINVAL;
}
@@ -67,6 +79,10 @@ static bool dpaa2_mac_phy_mode_mismatch(struct dpaa2_mac *mac,
phy_interface_t interface)
{
switch (interface) {
+ case PHY_INTERFACE_MODE_10GBASER:
+ case PHY_INTERFACE_MODE_USXGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_SGMII:
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_RXID:
@@ -95,6 +111,17 @@ static void dpaa2_mac_validate(struct phylink_config *config,
phylink_set(mask, Asym_Pause);
switch (state->interface) {
+ case PHY_INTERFACE_MODE_NA:
+ case PHY_INTERFACE_MODE_10GBASER:
+ case PHY_INTERFACE_MODE_USXGMII:
+ phylink_set(mask, 10000baseT_Full);
+ if (state->interface == PHY_INTERFACE_MODE_10GBASER)
+ break;
+ phylink_set(mask, 5000baseT_Full);
+ phylink_set(mask, 2500baseT_Full);
+ fallthrough;
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_RXID:
@@ -227,6 +254,52 @@ out:
return fixed;
}
+static int dpaa2_pcs_create(struct dpaa2_mac *mac,
+ struct device_node *dpmac_node, int id)
+{
+ struct mdio_device *mdiodev;
+ struct device_node *node;
+
+ node = of_parse_phandle(dpmac_node, "pcs-handle", 0);
+ if (!node) {
+ /* do not error out on old DTS files */
+ netdev_warn(mac->net_dev, "pcs-handle node not found\n");
+ return 0;
+ }
+
+ if (!of_device_is_available(node) ||
+ !of_device_is_available(node->parent)) {
+ netdev_err(mac->net_dev, "pcs-handle node not available\n");
+ return -ENODEV;
+ }
+
+ mdiodev = of_mdio_find_device(node);
+ of_node_put(node);
+ if (!mdiodev)
+ return -EPROBE_DEFER;
+
+ mac->pcs = lynx_pcs_create(mdiodev);
+ if (!mac->pcs) {
+ netdev_err(mac->net_dev, "lynx_pcs_create() failed\n");
+ put_device(&mdiodev->dev);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void dpaa2_pcs_destroy(struct dpaa2_mac *mac)
+{
+ struct lynx_pcs *pcs = mac->pcs;
+ struct device *dev = &pcs->mdio->dev;
+
+ if (pcs) {
+ lynx_pcs_destroy(pcs);
+ put_device(dev);
+ mac->pcs = NULL;
+ }
+}
+
int dpaa2_mac_connect(struct dpaa2_mac *mac)
{
struct fsl_mc_device *dpmac_dev = mac->mc_dev;
@@ -278,6 +351,13 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
goto err_put_node;
}
+ if (attr.link_type == DPMAC_LINK_TYPE_PHY &&
+ attr.eth_if != DPMAC_ETH_IF_RGMII) {
+ err = dpaa2_pcs_create(mac, dpmac_node, attr.id);
+ if (err)
+ goto err_put_node;
+ }
+
mac->phylink_config.dev = &net_dev->dev;
mac->phylink_config.type = PHYLINK_NETDEV;
@@ -286,10 +366,13 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
&dpaa2_mac_phylink_ops);
if (IS_ERR(phylink)) {
err = PTR_ERR(phylink);
- goto err_put_node;
+ goto err_pcs_destroy;
}
mac->phylink = phylink;
+ if (mac->pcs)
+ phylink_set_pcs(mac->phylink, &mac->pcs->pcs);
+
err = phylink_of_phy_connect(mac->phylink, dpmac_node, 0);
if (err) {
netdev_err(net_dev, "phylink_of_phy_connect() = %d\n", err);
@@ -302,6 +385,8 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
err_phylink_destroy:
phylink_destroy(mac->phylink);
+err_pcs_destroy:
+ dpaa2_pcs_destroy(mac);
err_put_node:
of_node_put(dpmac_node);
err_close_dpmac:
@@ -316,6 +401,8 @@ void dpaa2_mac_disconnect(struct dpaa2_mac *mac)
phylink_disconnect_phy(mac->phylink);
phylink_destroy(mac->phylink);
+ dpaa2_pcs_destroy(mac);
+
dpmac_close(mac->mc_io, 0, mac->mc_dev->mc_handle);
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
index 2130d9c7d40e..955a52856210 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.h
@@ -7,6 +7,7 @@
#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/phylink.h>
+#include <linux/pcs-lynx.h>
#include "dpmac.h"
#include "dpmac-cmd.h"
@@ -21,6 +22,7 @@ struct dpaa2_mac {
struct phylink *phylink;
phy_interface_t if_mode;
enum dpmac_link_type if_link_type;
+ struct lynx_pcs *pcs;
};
bool dpaa2_mac_is_type_fixed(struct fsl_mc_device *dpmac_dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 26f6f068b01d..c643c5ab60df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -8,7 +8,7 @@
#include "hclge_tm.h"
#include "hnae3.h"
-static struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
+static const struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
{ .reg_type = "bios common",
.dfx_msg = &hclge_dbg_bios_common_reg[0],
.reg_msg = { .msg_num = ARRAY_SIZE(hclge_dbg_bios_common_reg),
@@ -115,14 +115,14 @@ static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
}
static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
- struct hclge_dbg_reg_type_info *reg_info,
+ const struct hclge_dbg_reg_type_info *reg_info,
const char *cmd_buf)
{
#define IDX_OFFSET 1
const char *s = &cmd_buf[strlen(reg_info->reg_type) + IDX_OFFSET];
- struct hclge_dbg_dfx_message *dfx_message = reg_info->dfx_msg;
- struct hclge_dbg_reg_common_msg *reg_msg = &reg_info->reg_msg;
+ const struct hclge_dbg_dfx_message *dfx_message = reg_info->dfx_msg;
+ const struct hclge_dbg_reg_common_msg *reg_msg = &reg_info->reg_msg;
struct hclge_desc *desc_src;
struct hclge_desc *desc;
int entries_per_desc;
@@ -399,7 +399,7 @@ err_dcb_cmd_send:
static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
{
- struct hclge_dbg_reg_type_info *reg_info;
+ const struct hclge_dbg_reg_type_info *reg_info;
bool has_dump = false;
int i;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 38b79321c4c4..a9066e6ff697 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -81,13 +81,13 @@ struct hclge_dbg_dfx_message {
#define HCLGE_DBG_MAC_REG_TYPE_LEN 32
struct hclge_dbg_reg_type_info {
const char *reg_type;
- struct hclge_dbg_dfx_message *dfx_msg;
+ const struct hclge_dbg_dfx_message *dfx_msg;
struct hclge_dbg_reg_common_msg reg_msg;
};
#pragma pack()
-static struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
{false, "Reserved"},
{true, "BP_CPU_STATE"},
{true, "DFX_MSIX_INFO_NIC_0"},
@@ -103,7 +103,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
{false, "Reserved"},
{true, "SSU_ETS_PORT_STATUS"},
{true, "SSU_ETS_TCG_STATUS"},
@@ -175,7 +175,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_0[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
{true, "prt_id"},
{true, "PACKET_TC_CURR_BUFFER_CNT_0"},
{true, "PACKET_TC_CURR_BUFFER_CNT_1"},
@@ -282,7 +282,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_1[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
{true, "OQ_INDEX"},
{true, "QUEUE_CNT"},
{false, "Reserved"},
@@ -291,7 +291,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ssu_reg_2[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
{true, "prt_id"},
{true, "IGU_RX_ERR_PKT"},
{true, "IGU_RX_NO_SOF_PKT"},
@@ -356,7 +356,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_igu_egu_reg[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
{true, "tc_queue_num"},
{true, "FSM_DFX_ST0"},
{true, "FSM_DFX_ST1"},
@@ -365,7 +365,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_0[] = {
{true, "BUF_WAIT_TIMEOUT_QID"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
{false, "Reserved"},
{true, "FIFO_DFX_ST0"},
{true, "FIFO_DFX_ST1"},
@@ -381,7 +381,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rpu_reg_1[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
{false, "Reserved"},
{true, "NCSI_EGU_TX_FIFO_STS"},
{true, "NCSI_PAUSE_STATUS"},
@@ -453,7 +453,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ncsi_reg[] = {
{true, "NCSI_MAC_RX_PAUSE_FRAMES"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
{false, "Reserved"},
{true, "LGE_IGU_AFIFO_DFX_0"},
{true, "LGE_IGU_AFIFO_DFX_1"},
@@ -483,7 +483,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rtc_reg[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
{false, "Reserved"},
{true, "DROP_FROM_PRT_PKT_CNT"},
{true, "DROP_FROM_HOST_PKT_CNT"},
@@ -639,7 +639,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_ppp_reg[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
{false, "Reserved"},
{true, "FSM_DFX_ST0"},
{true, "FSM_DFX_ST1"},
@@ -711,7 +711,7 @@ static struct hclge_dbg_dfx_message hclge_dbg_rcb_reg[] = {
{false, "Reserved"},
};
-static struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
+static const struct hclge_dbg_dfx_message hclge_dbg_tqp_reg[] = {
{true, "q_num"},
{true, "RCB_CFG_RX_RING_TAIL"},
{true, "RCB_CFG_RX_RING_HEAD"},
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index e0eb294779ec..5a6bbee819cd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -784,7 +784,7 @@ static void free_cmdq(struct hinic_cmdq *cmdq)
* init_cmdqs_ctxt - write the cmdq ctxt to HW after init all cmdq
* @hwdev: the NIC HW device
* @cmdqs: cmdqs to write the ctxts for
- * &db_area: db_area for all the cmdqs
+ * @db_area: db_area for all the cmdqs
*
* Return 0 - Success, negative - Failure
**/
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 239685152f6e..0c74f6674634 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -437,6 +437,8 @@ static int get_base_qpn(struct hinic_hwdev *hwdev, u16 *base_qpn)
/**
* hinic_hwdev_ifup - Preparing the HW for passing IO
* @hwdev: the NIC HW device
+ * @sq_depth: the send queue depth
+ * @rq_depth: the receive queue depth
*
* Return 0 - Success, negative - Failure
**/
@@ -582,6 +584,7 @@ void hinic_hwdev_cb_unregister(struct hinic_hwdev *hwdev,
/**
* nic_mgmt_msg_handler - nic mgmt event handler
* @handle: private data for the handler
+ * @cmd: message command
* @buf_in: input buffer
* @in_size: input size
* @buf_out: output buffer
@@ -909,6 +912,7 @@ int hinic_set_interrupt_cfg(struct hinic_hwdev *hwdev,
/**
* hinic_init_hwdev - Initialize the NIC HW
* @pdev: the NIC pci device
+ * @devlink: the poniter of hinic devlink
*
* Return initialized NIC HW device
*
@@ -1121,7 +1125,7 @@ int hinic_hwdev_msix_cnt_set(struct hinic_hwdev *hwdev, u16 msix_index)
* @msix_index: msix_index
* @pending_limit: the maximum pending interrupt events (unit 8)
* @coalesc_timer: coalesc period for interrupt (unit 8 us)
- * @lli_timer: replenishing period for low latency credit (unit 8 us)
+ * @lli_timer_cfg: replenishing period for low latency credit (unit 8 us)
* @lli_credit_limit: maximum credits for low latency msix messages (unit 8)
* @resend_timer: maximum wait for resending msix (unit coalesc period)
*
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index f108b0c9228e..19942fef99d9 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -188,6 +188,7 @@ static u8 eq_cons_idx_checksum_set(u32 val)
/**
* eq_update_ci - update the HW cons idx of event queue
* @eq: the event queue to update the cons idx for
+ * @arm_state: the arm bit value of eq's interrupt
**/
static void eq_update_ci(struct hinic_eq *eq, u32 arm_state)
{
@@ -368,7 +369,7 @@ static void eq_irq_work(struct work_struct *work)
/**
* ceq_tasklet - the tasklet of the EQ that received the event
- * @ceq_data: the eq
+ * @t: the tasklet struct pointer
**/
static void ceq_tasklet(struct tasklet_struct *t)
{
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index bc8925c0c982..efbaed389440 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -230,6 +230,7 @@ static int wait_hwif_ready(struct hinic_hwif *hwif)
* @hwif: the HW interface of a pci function device
* @attr0: the first attribute that was read from the hw
* @attr1: the second attribute that was read from the hw
+ * @attr2: the third attribute that was read from the hw
**/
static void set_hwif_attr(struct hinic_hwif *hwif, u32 attr0, u32 attr1,
u32 attr2)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 2ebae6cb5db5..819fa13034c0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -238,6 +238,7 @@ static int send_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt,
* @out_size: response length
* @direction: the direction of the original message
* @resp_msg_id: msg id to response for
+ * @timeout: time-out period of waiting for response
*
* Return 0 - Success, negative - Failure
**/
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 19d01def891f..350225bbe0be 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -990,7 +990,7 @@ static void hinic_refresh_nic_cfg(struct hinic_dev *nic_dev)
* @handle: nic device for the handler
* @buf_in: input buffer
* @in_size: input size
- * @buf_in: output buffer
+ * @buf_out: output buffer
* @out_size: returned output size
*
* Return 0 - Success, negative - Failure
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
index 0bc2410c8949..2f7a861d0c7b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile
@@ -3,9 +3,10 @@
# Makefile for Marvell's OcteonTX2 RVU Admin Function driver
#
+ccflags-y += -I$(src)
obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o
obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o
-octeontx2_mbox-y := mbox.o
+octeontx2_mbox-y := mbox.o rvu_trace.o
octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
index 387e33fa417a..4b4cf7dac77f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
@@ -14,6 +14,7 @@
#include "rvu_reg.h"
#include "mbox.h"
+#include "rvu_trace.h"
static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
@@ -199,6 +200,9 @@ void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid)
*/
tx_hdr->num_msgs = mdev->num_msgs;
rx_hdr->num_msgs = 0;
+
+ trace_otx2_msg_send(mbox->pdev, tx_hdr->num_msgs, tx_hdr->msg_size);
+
spin_unlock(&mdev->mbox_lock);
/* The interrupt should be fired after num_msgs is written
@@ -295,10 +299,15 @@ int otx2_mbox_check_rsp_msgs(struct otx2_mbox *mbox, int devid)
struct mbox_msghdr *preq = mdev->mbase + ireq;
struct mbox_msghdr *prsp = mdev->mbase + irsp;
- if (preq->id != prsp->id)
+ if (preq->id != prsp->id) {
+ trace_otx2_msg_check(mbox->pdev, preq->id,
+ prsp->id, prsp->rc);
goto exit;
+ }
if (prsp->rc) {
rc = prsp->rc;
+ trace_otx2_msg_check(mbox->pdev, preq->id,
+ prsp->id, prsp->rc);
goto exit;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 4aaef0a2b51c..aa3bda3f34be 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -625,6 +625,7 @@ struct nix_rss_flowkey_cfg {
#define NIX_FLOW_KEY_TYPE_INNR_UDP BIT(15)
#define NIX_FLOW_KEY_TYPE_INNR_SCTP BIT(16)
#define NIX_FLOW_KEY_TYPE_INNR_ETH_DMAC BIT(17)
+#define NIX_FLOW_KEY_TYPE_VLAN BIT(20)
u32 flowkey_cfg; /* Flowkey types selected */
u8 group; /* RSS context or group */
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
index c3ef73ae782c..e1f918960730 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -20,6 +20,8 @@
#include "rvu_reg.h"
#include "ptp.h"
+#include "rvu_trace.h"
+
#define DRV_NAME "octeontx2-af"
#define DRV_STRING "Marvell OcteonTX2 RVU Admin Function Driver"
@@ -1549,6 +1551,7 @@ static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid,
if (rsp && err) \
rsp->hdr.rc = err; \
\
+ trace_otx2_msg_process(mbox->pdev, _id, err); \
return rsp ? err : -ENOMEM; \
}
MBOX_MESSAGES
@@ -1881,6 +1884,8 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
intr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT);
/* Clear interrupts */
rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFAF_MBOX_INT, intr);
+ if (intr)
+ trace_otx2_msg_interrupt(rvu->pdev, "PF(s) to AF", intr);
/* Sync with mbox memory region */
rmb();
@@ -1898,6 +1903,8 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq)
intr = rvupf_read64(rvu, RVU_PF_VFPF_MBOX_INTX(0));
rvupf_write64(rvu, RVU_PF_VFPF_MBOX_INTX(0), intr);
+ if (intr)
+ trace_otx2_msg_interrupt(rvu->pdev, "VF(s) to AF", intr);
rvu_queue_work(&rvu->afvf_wq_info, 0, vfs, intr);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index fe3389c144b5..fa9152ff5e2a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -15,6 +15,7 @@
#include "rvu.h"
#include "cgx.h"
#include "rvu_reg.h"
+#include "rvu_trace.h"
struct cgx_evq_entry {
struct list_head evq_node;
@@ -34,6 +35,7 @@ static struct _req_type __maybe_unused \
return NULL; \
req->hdr.sig = OTX2_MBOX_REQ_SIG; \
req->hdr.id = _id; \
+ trace_otx2_msg_alloc(rvu->pdev, _id, sizeof(*req)); \
return req; \
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 08181fc5f5d4..4bdc4baa3c59 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2509,6 +2509,14 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
field->ltype_match = NPC_LT_LE_GTPU;
field->ltype_mask = 0xF;
break;
+ case NIX_FLOW_KEY_TYPE_VLAN:
+ field->lid = NPC_LID_LB;
+ field->hdr_offset = 2; /* Skip TPID (2-bytes) */
+ field->bytesm1 = 1; /* 2 Bytes (Actually 12 bits) */
+ field->ltype_match = NPC_LT_LB_CTAG;
+ field->ltype_mask = 0xF;
+ field->fn_mask = 1; /* Mask out the first nibble */
+ break;
}
field->ena = 1;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
new file mode 100644
index 000000000000..56f90cf9c4c0
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#define CREATE_TRACE_POINTS
+#include "rvu_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL(otx2_msg_alloc);
+EXPORT_TRACEPOINT_SYMBOL(otx2_msg_interrupt);
+EXPORT_TRACEPOINT_SYMBOL(otx2_msg_process);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
new file mode 100644
index 000000000000..e6609068e81b
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvu
+
+#if !defined(__RVU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVU_TRACE_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/pci.h>
+
+TRACE_EVENT(otx2_msg_alloc,
+ TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size),
+ TP_ARGS(pdev, id, size),
+ TP_STRUCT__entry(__string(dev, pci_name(pdev))
+ __field(u16, id)
+ __field(u64, size)
+ ),
+ TP_fast_assign(__assign_str(dev, pci_name(pdev))
+ __entry->id = id;
+ __entry->size = size;
+ ),
+ TP_printk("[%s] msg:(0x%x) size:%lld\n", __get_str(dev),
+ __entry->id, __entry->size)
+);
+
+TRACE_EVENT(otx2_msg_send,
+ TP_PROTO(const struct pci_dev *pdev, u16 num_msgs, u64 msg_size),
+ TP_ARGS(pdev, num_msgs, msg_size),
+ TP_STRUCT__entry(__string(dev, pci_name(pdev))
+ __field(u16, num_msgs)
+ __field(u64, msg_size)
+ ),
+ TP_fast_assign(__assign_str(dev, pci_name(pdev))
+ __entry->num_msgs = num_msgs;
+ __entry->msg_size = msg_size;
+ ),
+ TP_printk("[%s] sent %d msg(s) of size:%lld\n", __get_str(dev),
+ __entry->num_msgs, __entry->msg_size)
+);
+
+TRACE_EVENT(otx2_msg_check,
+ TP_PROTO(const struct pci_dev *pdev, u16 reqid, u16 rspid, int rc),
+ TP_ARGS(pdev, reqid, rspid, rc),
+ TP_STRUCT__entry(__string(dev, pci_name(pdev))
+ __field(u16, reqid)
+ __field(u16, rspid)
+ __field(int, rc)
+ ),
+ TP_fast_assign(__assign_str(dev, pci_name(pdev))
+ __entry->reqid = reqid;
+ __entry->rspid = rspid;
+ __entry->rc = rc;
+ ),
+ TP_printk("[%s] req->id:0x%x rsp->id:0x%x resp_code:%d\n",
+ __get_str(dev), __entry->reqid,
+ __entry->rspid, __entry->rc)
+);
+
+TRACE_EVENT(otx2_msg_interrupt,
+ TP_PROTO(const struct pci_dev *pdev, const char *msg, u64 intr),
+ TP_ARGS(pdev, msg, intr),
+ TP_STRUCT__entry(__string(dev, pci_name(pdev))
+ __string(str, msg)
+ __field(u64, intr)
+ ),
+ TP_fast_assign(__assign_str(dev, pci_name(pdev))
+ __assign_str(str, msg)
+ __entry->intr = intr;
+ ),
+ TP_printk("[%s] mbox interrupt %s (0x%llx)\n", __get_str(dev),
+ __get_str(str), __entry->intr)
+);
+
+TRACE_EVENT(otx2_msg_process,
+ TP_PROTO(const struct pci_dev *pdev, u16 id, int err),
+ TP_ARGS(pdev, id, err),
+ TP_STRUCT__entry(__string(dev, pci_name(pdev))
+ __field(u16, id)
+ __field(int, err)
+ ),
+ TP_fast_assign(__assign_str(dev, pci_name(pdev))
+ __entry->id = id;
+ __entry->err = err;
+ ),
+ TP_printk("[%s] msg:(0x%x) error:%d\n", __get_str(dev),
+ __entry->id, __entry->err)
+);
+
+#endif /* __RVU_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE rvu_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 820fc660de66..d2581090f9a4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -355,7 +355,7 @@ int otx2_rss_init(struct otx2_nic *pfvf)
rss->flowkey_cfg = rss->enable ? rss->flowkey_cfg :
NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6 |
NIX_FLOW_KEY_TYPE_TCP | NIX_FLOW_KEY_TYPE_UDP |
- NIX_FLOW_KEY_TYPE_SCTP;
+ NIX_FLOW_KEY_TYPE_SCTP | NIX_FLOW_KEY_TYPE_VLAN;
ret = otx2_set_flowkey_cfg(pfvf);
if (ret)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index ac47762cce9b..d6253f2a414d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -20,6 +20,7 @@
#include <mbox.h>
#include "otx2_reg.h"
#include "otx2_txrx.h"
+#include <rvu_trace.h>
/* PCI device IDs */
#define PCI_DEVID_OCTEONTX2_RVU_PF 0xA063
@@ -523,6 +524,7 @@ static struct _req_type __maybe_unused \
return NULL; \
req->hdr.sig = OTX2_MBOX_REQ_SIG; \
req->hdr.id = _id; \
+ trace_otx2_msg_alloc(mbox->mbox.pdev, _id, sizeof(*req)); \
return req; \
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index 0341d9694e8b..662fb80dbb9d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -428,6 +428,8 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf,
/* Mimimum is IPv4 and IPv6, SIP/DIP */
nfc->data = RXH_IP_SRC | RXH_IP_DST;
+ if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_VLAN)
+ nfc->data |= RXH_VLAN;
switch (nfc->flow_type) {
case TCP_V4_FLOW:
@@ -477,6 +479,11 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf,
if (!(nfc->data & RXH_IP_SRC) || !(nfc->data & RXH_IP_DST))
return -EINVAL;
+ if (nfc->data & RXH_VLAN)
+ rss_cfg |= NIX_FLOW_KEY_TYPE_VLAN;
+ else
+ rss_cfg &= ~NIX_FLOW_KEY_TYPE_VLAN;
+
switch (nfc->flow_type) {
case TCP_V4_FLOW:
case TCP_V6_FLOW:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index aac2845c1fb1..265e4d1b4e64 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -22,6 +22,7 @@
#include "otx2_txrx.h"
#include "otx2_struct.h"
#include "otx2_ptp.h"
+#include <rvu_trace.h>
#define DRV_NAME "octeontx2-nicpf"
#define DRV_STRING "Marvell OcteonTX2 NIC Physical Function Driver"
@@ -558,6 +559,8 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF);
+ trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr);
+
return IRQ_HANDLED;
}
@@ -940,6 +943,9 @@ static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq)
otx2_write64(pf, RVU_PF_INT, BIT_ULL(0));
mbox = &pf->mbox;
+
+ trace_otx2_msg_interrupt(mbox->mbox.pdev, "AF to PF", BIT_ULL(0));
+
otx2_queue_work(mbox, pf->mbox_wq, 0, 1, 1, TYPE_PFAF);
return IRQ_HANDLED;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 70e0d4ca6688..32daa3e0f296 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -187,6 +187,8 @@ static irqreturn_t otx2vf_vfaf_mbox_intr_handler(int irq, void *vf_irq)
mdev = &mbox->dev[0];
otx2_sync_mbox_bbuf(mbox, 0);
+ trace_otx2_msg_interrupt(mbox->pdev, "PF to VF", BIT_ULL(0));
+
hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start);
if (hdr->num_msgs) {
vf->mbox.num_msgs = hdr->num_msgs;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e9010ceb5ea9..5368e06cd71c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -221,6 +221,7 @@ enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_STRIDING_RQ,
MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
MLX5E_PFLAG_XDP_TX_MPWQE,
+ MLX5E_PFLAG_SKB_TX_MPWQE,
MLX5E_NUM_PFLAGS, /* Keep last */
};
@@ -305,6 +306,7 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_MPWQE,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
@@ -313,26 +315,40 @@ enum {
MLX5E_SQ_STATE_PENDING_XSK_TX,
};
+struct mlx5e_tx_mpwqe {
+ /* Current MPWQE session */
+ struct mlx5e_tx_wqe *wqe;
+ u32 bytes_count;
+ u8 ds_count;
+ u8 pkt_count;
+ u8 inline_on;
+};
+
struct mlx5e_txqsq {
/* data path */
/* dirtied @completion */
u16 cc;
+ u16 skb_fifo_cc;
u32 dma_fifo_cc;
struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
+ u16 skb_fifo_pc;
u32 dma_fifo_pc;
+ struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
struct mlx5_wq_cyc wq;
u32 dma_fifo_mask;
+ u16 skb_fifo_mask;
struct mlx5e_sq_stats *stats;
struct {
struct mlx5e_sq_dma *dma_fifo;
+ struct sk_buff **skb_fifo;
struct mlx5e_tx_wqe_info *wqe_info;
} db;
void __iomem *uar_map;
@@ -399,7 +415,7 @@ struct mlx5e_xdp_info {
};
};
-struct mlx5e_xdp_xmit_data {
+struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len;
@@ -412,18 +428,10 @@ struct mlx5e_xdp_info_fifo {
u32 mask;
};
-struct mlx5e_xdp_mpwqe {
- /* Current MPWQE session */
- struct mlx5e_tx_wqe *wqe;
- u8 ds_count;
- u8 pkt_count;
- u8 inline_on;
-};
-
struct mlx5e_xdpsq;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
- struct mlx5e_xdp_xmit_data *,
+ struct mlx5e_xmit_data *,
struct mlx5e_xdp_info *,
int);
@@ -438,7 +446,7 @@ struct mlx5e_xdpsq {
u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
u16 pc;
struct mlx5_wqe_ctrl_seg *doorbell_cseg;
- struct mlx5e_xdp_mpwqe mpwqe;
+ struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 24336c60123a..07ee1d236ab3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -7,6 +7,21 @@
#include "en.h"
#include <linux/indirect_call_wrapper.h>
+#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+
+/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
+ * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
+ * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
+ * full-session WQE be cache-aligned.
+ */
+#if L1_CACHE_BYTES < 128
+#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
+#else
+#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
+#endif
+
+#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
enum mlx5e_icosq_wqe_type {
@@ -46,8 +61,6 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
-void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
@@ -110,6 +123,7 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes;
u8 num_wqebbs;
u8 num_dma;
+ u8 num_fifo_pkts;
#ifdef CONFIG_MLX5_EN_TLS
struct page *resync_dump_frag_page;
#endif
@@ -194,23 +208,6 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
}
static inline void
-mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
-{
- struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
-
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- memset(wi, 0, sizeof(*wi));
- wi->num_wqebbs = 1;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
- sq->stats->nop += nnops;
-}
-
-static inline void
mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
struct mlx5_wqe_ctrl_seg *ctrl)
{
@@ -228,29 +225,6 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
mlx5_write64((__be32 *)ctrl, uar_map);
}
-static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg)
-{
- return cseg && !!cseg->tis_tir_num;
-}
-
-static inline u8
-mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
- struct sk_buff *skb)
-{
- u8 mode;
-
- if (mlx5e_transport_inline_tx_wqe(cseg))
- return MLX5_INLINE_MODE_TCP_UDP;
-
- mode = sq->min_inline_mode;
-
- if (skb_vlan_tag_present(skb) &&
- test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
- mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
-
- return mode;
-}
-
static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
{
struct mlx5_core_cq *mcq;
@@ -276,6 +250,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
dma->type = map_type;
}
+static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
+{
+ return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
+}
+
+static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+{
+ struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
+
+ *skb_item = skb;
+}
+
+static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
+{
+ return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
+}
+
static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{
@@ -291,6 +282,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
}
}
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
+
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
+{
+ return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
+}
+
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
{
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 562bc465f82b..ae90d533a350 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -59,7 +59,7 @@ static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
struct mlx5e_dma_info *di, struct xdp_buff *xdp)
{
- struct mlx5e_xdp_xmit_data xdptxd;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
@@ -194,18 +194,22 @@ static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
+ struct mlx5e_tx_wqe *wqe;
u16 pi;
- pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
- session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ net_prefetchw(wqe->data);
- net_prefetchw(session->wqe->data);
- session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
- session->pkt_count = 0;
-
- mlx5e_xdp_update_inline_state(sq);
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
+ };
stats->mpwqe++;
}
@@ -213,7 +217,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
u16 ds_count = session->ds_count;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -258,10 +262,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
}
INDIRECT_CALLABLE_SCOPE bool
-mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
+mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi, int check_result)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
if (unlikely(xdptxd->len > sq->hw_mtu)) {
@@ -284,8 +288,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *x
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
- session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
+ if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
@@ -306,7 +309,7 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
}
INDIRECT_CALLABLE_SCOPE bool
-mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi, int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
@@ -503,7 +506,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
- struct mlx5e_xdp_xmit_data xdptxd;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
bool ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index e806c13d491f..d487e5e37162 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -38,27 +38,12 @@
#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
- (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
-#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-
-#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
-#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
- DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
-
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
- */
-#if L1_CACHE_BYTES < 128
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */)
-#define MLX5E_XDP_MPW_MAX_NUM_DS \
- (MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
+#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
+ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
+ sizeof(struct mlx5_wqe_inline_seg))
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
@@ -73,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
@@ -122,30 +107,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
/* Enable inline WQEs to shift some load from a congested HCA (HW) to
* a less congested cpu (SW).
*/
-static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
+static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
{
u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
#define MLX5E_XDP_INLINE_WATERMARK_LOW 10
#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
- if (session->inline_on) {
- if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
- session->inline_on = 0;
- return;
- }
+ if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
+ return false;
+
+ if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
+ return true;
- /* inline is false */
- if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
- session->inline_on = 1;
+ return cur;
}
-static inline bool
-mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
+static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
{
- return session->inline_on &&
- session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
+ if (session->inline_on)
+ return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
+ MLX5E_TX_MPW_MAX_NUM_DS;
+ return mlx5e_tx_mpwqe_is_full(session);
}
struct mlx5e_xdp_wqe_info {
@@ -155,15 +138,16 @@ struct mlx5e_xdp_wqe_info {
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
u32 dma_len = xdptxd->len;
session->pkt_count++;
+ session->bytes_count += dma_len;
if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
struct mlx5_wqe_inline_seg *inline_dseg =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index aa91cbdfe969..fb671a457129 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -67,8 +67,8 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
struct xsk_buff_pool *pool = sq->xsk_pool;
+ struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
- struct mlx5e_xdp_xmit_data xdptxd;
bool work_done = true;
bool flush = false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 110476bdeffb..2ea1cdc1ca54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -128,26 +128,38 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
return true;
}
-static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv,
- struct mlx5e_txqsq *sq,
- struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe,
- struct mlx5e_accel_tx_state *state)
-{
-#ifdef CONFIG_MLX5_EN_TLS
- mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
-#endif
+/* Part of the eseg touched by TX offloads */
+#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
+static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
#ifdef CONFIG_MLX5_EN_IPSEC
if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
- if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, &wqe->eth, skb)))
+ if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, eseg, skb)))
return false;
}
#endif
+#if IS_ENABLED(CONFIG_GENEVE)
+ if (skb->encapsulation)
+ mlx5e_tx_tunnel_accel(skb, eseg);
+#endif
+
return true;
}
+static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe *wqe,
+ struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+ mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
+#endif
+}
+
static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
{
return mlx5e_ktls_init_rx(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index f4861545b236..b140e13fdcc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -345,9 +345,6 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_sq_stats *stats;
struct mlx5e_sq_dma *dma;
- if (!wi->resync_dump_frag_page)
- return;
-
dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
stats = sq->stats;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index ff4c740af10b..7521c9be735b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -29,12 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc);
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ if (unlikely(wi->resync_dump_frag_page)) {
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
+ return true;
+ }
+ return false;
+}
#else
-static inline void
-mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
- struct mlx5e_tx_wqe_info *wi,
- u32 *dma_fifo_cc)
+static inline bool
+mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
{
+ return false;
}
#endif /* CONFIG_MLX5_EN_TLS */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index b0c31d49ff8d..6982b193ee8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -189,12 +189,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
struct mlx5e_tls *tls)
{
u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
- struct mlx5e_tx_wqe *wqe;
struct sync_info info;
struct sk_buff *nskb;
int linear_len = 0;
int headln;
- u16 pi;
int i;
sq->stats->tls_ooo++;
@@ -246,9 +244,7 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
sq->stats->tls_resync_bytes += nskb->len;
mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
cpu_to_be64(info.rcd_sn));
- pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
- wqe = MLX5E_TX_FETCH_WQE(sq, pi);
- mlx5e_sq_xmit(sq, nskb, wqe, pi, true);
+ mlx5e_sq_xmit_simple(sq, nskb, true);
return true;
@@ -274,6 +270,8 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
if (!datalen)
return true;
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
tls_ctx = tls_get_ctx(skb->sk);
if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
goto err_out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 0dda80d8bdca..d25a56ec6876 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1908,7 +1908,7 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
return 0;
}
-static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
@@ -1920,7 +1920,7 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
new_channels.params = priv->channels.params;
- MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+ MLX5E_SET_PFLAG(&new_channels.params, flag, enable);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
@@ -1931,6 +1931,16 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
return err;
}
+static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
+}
+
+static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable)
+{
+ return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable);
+}
+
static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
{ "rx_cqe_moder", set_pflag_rx_cqe_based_moder },
{ "tx_cqe_moder", set_pflag_tx_cqe_based_moder },
@@ -1938,6 +1948,7 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
{ "rx_striding_rq", set_pflag_rx_striding_rq },
{ "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
{ "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
+ { "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe },
};
static int mlx5e_handle_pflag(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4d30a81cbadc..961cdce37cc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1043,6 +1043,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{
kvfree(sq->db.wqe_info);
+ kvfree(sq->db.skb_fifo);
kvfree(sq->db.dma_fifo);
}
@@ -1054,15 +1055,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.dma_fifo)),
GFP_KERNEL, numa);
+ sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.skb_fifo)),
+ GFP_KERNEL, numa);
sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.wqe_info)),
GFP_KERNEL, numa);
- if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+ if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
mlx5e_free_txqsq_db(sq);
return -ENOMEM;
}
sq->dma_fifo_mask = df_sz - 1;
+ sq->skb_fifo_mask = df_sz - 1;
return 0;
}
@@ -1073,6 +1078,12 @@ static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
sq->stop_room = mlx5e_tls_get_stop_room(sq);
sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state))
+ /* A MPWQE can take up to the maximum-sized WQE + all the normal
+ * stop room can be taken if a new packet breaks the active
+ * MPWQE session and allocates its WQEs right away.
+ */
+ sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (WARN_ON(sq->stop_room >= sq_size)) {
netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
@@ -1114,6 +1125,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (mlx5_accel_is_tls_device(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+ if (param->is_mpw)
+ set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
if (err)
return err;
@@ -2162,6 +2175,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
MLX5_SET(sqc, sqc, allow_swp, allow_swp);
+ param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
mlx5e_build_tx_cq_param(priv, params, &param->cqp);
}
@@ -4703,6 +4717,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv,
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE,
+ MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
/* XDP SQ */
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 0d34befc5761..78f6a6f0a7e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -110,6 +110,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
@@ -365,6 +367,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
s->tx_nop += sq_stats->nop;
+ s->tx_mpwqe_blks += sq_stats->mpwqe_blks;
+ s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts;
s->tx_queue_stopped += sq_stats->stopped;
s->tx_queue_wake += sq_stats->wake;
s->tx_queue_dropped += sq_stats->dropped;
@@ -1568,6 +1572,8 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 771f30cb0700..162daaadb0d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -121,6 +121,8 @@ struct mlx5e_sw_stats {
u64 tx_tso_inner_bytes;
u64 tx_added_vlan_packets;
u64 tx_nop;
+ u64 tx_mpwqe_blks;
+ u64 tx_mpwqe_pkts;
u64 rx_lro_packets;
u64 rx_lro_bytes;
u64 rx_mcast_packets;
@@ -351,6 +353,8 @@ struct mlx5e_sq_stats {
u64 csum_partial_inner;
u64 added_vlan_packets;
u64 nop;
+ u64 mpwqe_blks;
+ u64 mpwqe_pkts;
#ifdef CONFIG_MLX5_EN_TLS
u64 tls_encrypted_packets;
u64 tls_encrypted_bytes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index da596de3abba..13bd4f254ed7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -232,131 +232,180 @@ dma_unmap_wqe_err:
return -ENOMEM;
}
+struct mlx5e_tx_attr {
+ u32 num_bytes;
+ u16 headlen;
+ u16 ihs;
+ __be16 mss;
+ u8 opcode;
+};
+
+struct mlx5e_tx_wqe_attr {
+ u16 ds_cnt;
+ u16 ds_cnt_inl;
+ u8 num_wqebbs;
+};
+
+static u8
+mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel)
+{
+ u8 mode;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ if (accel && accel->tls.tls_tisn)
+ return MLX5_INLINE_MODE_TCP_UDP;
+#endif
+
+ mode = sq->min_inline_mode;
+
+ if (skb_vlan_tag_present(skb) &&
+ test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
+ mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
+
+ return mode;
+}
+
+static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_accel_tx_state *accel,
+ struct mlx5e_tx_attr *attr)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+
+ if (skb_is_gso(skb)) {
+ u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_LSO,
+ .mss = cpu_to_be16(skb_shinfo(skb)->gso_size),
+ .ihs = ihs,
+ .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
+ .headlen = skb_headlen(skb) - ihs,
+ };
+
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
+ u16 ihs = mlx5e_calc_min_inline(mode, skb);
+
+ *attr = (struct mlx5e_tx_attr) {
+ .opcode = MLX5_OPCODE_SEND,
+ .mss = cpu_to_be16(0),
+ .ihs = ihs,
+ .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
+ .headlen = skb_headlen(skb) - ihs,
+ };
+
+ stats->packets++;
+ }
+
+ stats->bytes += attr->num_bytes;
+}
+
+static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+ u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
+ u16 ds_cnt_inl = 0;
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ if (skb_vlan_tag_present(skb))
+ inl += VLAN_HLEN;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
+static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
+
+static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+}
+
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
+ const struct mlx5e_tx_attr *attr,
+ const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
bool xmit_more)
{
struct mlx5_wq_cyc *wq = &sq->wq;
bool send_doorbell;
- wi->num_bytes = num_bytes;
- wi->num_dma = num_dma;
- wi->num_wqebbs = num_wqebbs;
- wi->skb = skb;
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = skb,
+ .num_bytes = attr->num_bytes,
+ .num_dma = num_dma,
+ .num_wqebbs = wqe_attr->num_wqebbs,
+ .num_fifo_pkts = 0,
+ };
- cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
- cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ mlx5e_tx_skb_update_hwts_flags(skb);
sq->pc += wi->num_wqebbs;
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
- netif_tx_stop_queue(sq->txq);
- sq->stats->stopped++;
- }
- send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
- xmit_more);
+ mlx5e_tx_check_stop(sq);
+
+ send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
if (send_doorbell)
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
}
-void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
- struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
+static void
+mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
+ struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_eth_seg *eseg;
struct mlx5_wqe_data_seg *dseg;
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 headlen, ihs, contig_wqebbs_room;
- u16 ds_cnt, ds_cnt_inl = 0;
- u8 num_wqebbs, opcode;
- u32 num_bytes;
int num_dma;
- __be16 mss;
-
- /* Calc ihs and ds cnt, no writes to wqe yet */
- ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
- if (skb_is_gso(skb)) {
- opcode = MLX5_OPCODE_LSO;
- mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
- ihs = mlx5e_tx_get_gso_ihs(sq, skb);
- num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
- stats->packets += skb_shinfo(skb)->gso_segs;
- } else {
- u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb);
-
- opcode = MLX5_OPCODE_SEND;
- mss = 0;
- ihs = mlx5e_calc_min_inline(mode, skb);
- num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
- stats->packets++;
- }
- stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
- headlen = skb->len - ihs - skb->data_len;
- ds_cnt += !!headlen;
- ds_cnt += skb_shinfo(skb)->nr_frags;
-
- if (ihs) {
- ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN;
-
- ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
- ds_cnt += ds_cnt_inl;
- }
-
- num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room < num_wqebbs)) {
-#ifdef CONFIG_MLX5_EN_IPSEC
- struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
- struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
-#endif
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- wqe = MLX5E_TX_FETCH_WQE(sq, pi);
-#ifdef CONFIG_MLX5_EN_IPSEC
- wqe->eth = cur_eth;
-#endif
-#ifdef CONFIG_MLX5_EN_TLS
- wqe->ctrl = cur_ctrl;
-#endif
- }
-
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
eseg = &wqe->eth;
dseg = wqe->data;
-#if IS_ENABLED(CONFIG_GENEVE)
- if (skb->encapsulation)
- mlx5e_tx_tunnel_accel(skb, eseg);
-#endif
- mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
-
- eseg->mss = mss;
+ eseg->mss = attr->mss;
- if (ihs) {
- eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ if (attr->ihs) {
if (skb_vlan_tag_present(skb)) {
- ihs -= VLAN_HLEN;
- mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs);
+ eseg->inline_hdr.sz = cpu_to_be16(attr->ihs + VLAN_HLEN);
+ mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
stats->added_vlan_packets++;
} else {
- memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ eseg->inline_hdr.sz = cpu_to_be16(attr->ihs);
+ memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
}
- dseg += ds_cnt_inl;
+ dseg += wqe_attr->ds_cnt_inl;
} else if (skb_vlan_tag_present(skb)) {
eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
@@ -365,12 +414,12 @@ void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->added_vlan_packets++;
}
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
+ attr->headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
- mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
- num_dma, wi, cseg, xmit_more);
+ mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
return;
@@ -379,10 +428,172 @@ err_drop:
dev_kfree_skb_any(skb);
}
+static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
+{
+ return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs;
+}
+
+static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+
+ /* Assumes the session is already running and has at least one packet. */
+ return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+}
+
+static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ prefetchw(wqe->data);
+
+ *session = (struct mlx5e_tx_mpwqe) {
+ .wqe = wqe,
+ .bytes_count = 0,
+ .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
+ .pkt_count = 0,
+ .inline_on = 0,
+ };
+
+ memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
+
+ sq->stats->mpwqe_blks++;
+}
+
+static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
+{
+ return sq->mpwqe.wqe;
+}
+
+static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg;
+
+ dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
+
+ session->pkt_count++;
+ session->bytes_count += txd->len;
+
+ dseg->addr = cpu_to_be64(txd->dma_addr);
+ dseg->byte_count = cpu_to_be32(txd->len);
+ dseg->lkey = sq->mkey_be;
+ session->ds_count++;
+
+ sq->stats->mpwqe_pkts++;
+}
+
+static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
+ u8 ds_count = session->ds_count;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_tx_wqe_info *wi;
+ u16 pi;
+
+ cseg = &session->wqe->ctrl;
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .skb = NULL,
+ .num_bytes = session->bytes_count,
+ .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
+ .num_dma = session->pkt_count,
+ .num_fifo_pkts = session->pkt_count,
+ };
+
+ sq->pc += wi->num_wqebbs;
+
+ session->wqe = NULL;
+
+ mlx5e_tx_check_stop(sq);
+
+ return cseg;
+}
+
+static void
+mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
+{
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5e_xmit_data txd;
+
+ if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
+ mlx5e_tx_mpwqe_session_complete(sq);
+ mlx5e_tx_mpwqe_session_start(sq, eseg);
+ }
+
+ sq->stats->xmit_more += xmit_more;
+
+ txd.data = skb->data;
+ txd.len = skb->len;
+
+ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+ goto err_unmap;
+ mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
+
+ mlx5e_skb_fifo_push(sq, skb);
+
+ mlx5e_tx_mpwqe_add_dseg(sq, &txd);
+
+ mlx5e_tx_skb_update_hwts_flags(skb);
+
+ if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
+ /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
+ /* Might stop the queue, but we were asked to ring the doorbell anyway. */
+ cseg = mlx5e_tx_mpwqe_session_complete(sq);
+
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
+ }
+
+ return;
+
+err_unmap:
+ mlx5e_dma_unmap_wqe_err(sq, 1);
+ sq->stats->dropped++;
+ dev_kfree_skb_any(skb);
+}
+
+void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
+{
+ /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
+ if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
+ mlx5e_tx_mpwqe_session_complete(sq);
+}
+
+static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
+ struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+{
+ if (unlikely(!mlx5e_accel_tx_eseg(priv, sq, skb, eseg)))
+ return false;
+
+ mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+
+ return true;
+}
+
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_accel_tx_state accel = {};
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
struct mlx5e_tx_wqe *wqe;
struct mlx5e_txqsq *sq;
u16 pi;
@@ -391,21 +602,91 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
/* May send SKBs and WQEs. */
if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
- goto out;
+ return NETDEV_TX_OK;
- pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
+
+ if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
+ if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
+ struct mlx5_wqe_eth_seg eseg = {};
+
+ if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &eseg)))
+ return NETDEV_TX_OK;
+
+ mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
+ return NETDEV_TX_OK;
+ }
+
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+ }
+
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
/* May update the WQE, but may not post other WQEs. */
- if (unlikely(!mlx5e_accel_tx_finish(priv, sq, skb, wqe, &accel)))
- goto out;
+ mlx5e_accel_tx_finish(sq, wqe, &accel);
+ if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
+ return NETDEV_TX_OK;
- mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
-out:
return NETDEV_TX_OK;
}
+void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
+{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+ mlx5e_txwqe_build_eseg_csum(sq, skb, &wqe->eth);
+ mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
+}
+
+static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc)
+{
+ int i;
+
+ for (i = 0; i < wi->num_dma; i++) {
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+}
+
+static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ struct skb_shared_hwtstamps hwts = {};
+ u64 ts = get_cqe_ts(cqe);
+
+ hwts.hwtstamp = mlx5_timecounter_cyc2time(sq->clock, ts);
+ skb_tstamp_tx(skb, &hwts);
+ }
+
+ napi_consume_skb(skb, napi_budget);
+}
+
+static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
+ struct mlx5_cqe64 *cqe, int napi_budget)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++) {
+ struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
+
+ mlx5e_consume_skb(sq, skb, cqe, napi_budget);
+ }
+}
+
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
@@ -451,42 +732,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
- struct sk_buff *skb;
- int j;
-
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
- if (unlikely(!skb)) {
- mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
- sqcc += wi->num_wqebbs;
- continue;
- }
+ sqcc += wi->num_wqebbs;
- if (unlikely(skb_shinfo(skb)->tx_flags &
- SKBTX_HW_TSTAMP)) {
- struct skb_shared_hwtstamps hwts = {};
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
- hwts.hwtstamp =
- mlx5_timecounter_cyc2time(sq->clock,
- get_cqe_ts(cqe));
- skb_tstamp_tx(skb, &hwts);
+ npkts++;
+ nbytes += wi->num_bytes;
+ continue;
}
- for (j = 0; j < wi->num_dma; j++) {
- struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, dma_fifo_cc++);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
+ &dma_fifo_cc)))
+ continue;
- mlx5e_tx_dma_unmap(sq->pdev, dma);
- }
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
- npkts++;
- nbytes += wi->num_bytes;
- sqcc += wi->num_wqebbs;
- napi_consume_skb(skb, napi_budget);
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
} while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
@@ -525,13 +797,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
+static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
+{
+ int i;
+
+ for (i = 0; i < wi->num_fifo_pkts; i++)
+ dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
+}
+
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_wqe_info *wi;
u32 dma_fifo_cc, nbytes = 0;
u16 ci, sqcc, npkts = 0;
- struct sk_buff *skb;
- int i;
sqcc = sq->cc;
dma_fifo_cc = sq->dma_fifo_cc;
@@ -539,25 +817,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
while (sqcc != sq->pc) {
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
- skb = wi->skb;
- if (!skb) {
- mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
- sqcc += wi->num_wqebbs;
+ sqcc += wi->num_wqebbs;
+
+ if (likely(wi->skb)) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ dev_kfree_skb_any(wi->skb);
+
+ npkts++;
+ nbytes += wi->num_bytes;
continue;
}
- for (i = 0; i < wi->num_dma; i++) {
- struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, dma_fifo_cc++);
+ if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
+ continue;
- mlx5e_tx_dma_unmap(sq->pdev, dma);
- }
+ if (wi->num_fifo_pkts) {
+ mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
+ mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
- dev_kfree_skb_any(skb);
- npkts++;
- nbytes += wi->num_bytes;
- sqcc += wi->num_wqebbs;
+ npkts += wi->num_fifo_pkts;
+ nbytes += wi->num_bytes;
+ }
}
sq->dma_fifo_cc = dma_fifo_cc;
@@ -576,9 +857,34 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
}
+static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
+ const struct mlx5e_tx_attr *attr,
+ struct mlx5e_tx_wqe_attr *wqe_attr)
+{
+ u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
+ u16 ds_cnt_inl = 0;
+
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+
+ if (attr->ihs) {
+ u16 inl = attr->ihs - INL_HDR_START_SZ;
+
+ ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ *wqe_attr = (struct mlx5e_tx_wqe_attr) {
+ .ds_cnt = ds_cnt,
+ .ds_cnt_inl = ds_cnt_inl,
+ .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
+ };
+}
+
void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
{
+ struct mlx5e_tx_wqe_attr wqe_attr;
+ struct mlx5e_tx_attr attr;
struct mlx5i_tx_wqe *wqe;
struct mlx5_wqe_datagram_seg *datagram;
@@ -588,47 +894,17 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 ds_cnt, ds_cnt_inl = 0;
- u8 num_wqebbs, opcode;
- u16 headlen, ihs, pi;
- u32 num_bytes;
int num_dma;
- __be16 mss;
+ u16 pi;
- /* Calc ihs and ds cnt, no writes to wqe yet */
- ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
- if (skb_is_gso(skb)) {
- opcode = MLX5_OPCODE_LSO;
- mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
- ihs = mlx5e_tx_get_gso_ihs(sq, skb);
- num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
- stats->packets += skb_shinfo(skb)->gso_segs;
- } else {
- u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb);
+ mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
+ mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
- opcode = MLX5_OPCODE_SEND;
- mss = 0;
- ihs = mlx5e_calc_min_inline(mode, skb);
- num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
- stats->packets++;
- }
+ pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
+ wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
- stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
- headlen = skb->len - ihs - skb->data_len;
- ds_cnt += !!headlen;
- ds_cnt += skb_shinfo(skb)->nr_frags;
-
- if (ihs) {
- ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
- ds_cnt += ds_cnt_inl;
- }
-
- num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
- wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
-
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
@@ -640,20 +916,20 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
- eseg->mss = mss;
+ eseg->mss = attr.mss;
- if (ihs) {
- memcpy(eseg->inline_hdr.start, skb->data, ihs);
- eseg->inline_hdr.sz = cpu_to_be16(ihs);
- dseg += ds_cnt_inl;
+ if (attr.ihs) {
+ memcpy(eseg->inline_hdr.start, skb->data, attr.ihs);
+ eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
+ dseg += wqe_attr.ds_cnt_inl;
}
- num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs,
+ attr.headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
- mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
- num_dma, wi, cseg, xmit_more);
+ mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
return;
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index de93cc6ebc1a..7e236c9ee4b1 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3038,7 +3038,6 @@ static int lan743x_pm_suspend(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
struct lan743x_adapter *adapter = netdev_priv(netdev);
- int ret;
lan743x_pcidev_shutdown(pdev);
@@ -3051,9 +3050,7 @@ static int lan743x_pm_suspend(struct device *dev)
lan743x_pm_set_wol(adapter);
/* Host sets PME_En, put D3hot */
- ret = pci_prepare_to_sleep(pdev);
-
- return 0;
+ return pci_prepare_to_sleep(pdev);;
}
static int lan743x_pm_resume(struct device *dev)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 8518e1d60da4..0445c5ee5551 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -413,26 +413,20 @@ void ocelot_port_disable(struct ocelot *ocelot, int port)
}
EXPORT_SYMBOL(ocelot_port_disable);
-int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port,
- struct sk_buff *skb)
+void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
+ struct sk_buff *clone)
{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
- struct ocelot *ocelot = ocelot_port->ocelot;
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
- if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP &&
- ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
- spin_lock(&ocelot_port->ts_id_lock);
+ spin_lock(&ocelot_port->ts_id_lock);
- shinfo->tx_flags |= SKBTX_IN_PROGRESS;
- /* Store timestamp ID in cb[0] of sk_buff */
- skb->cb[0] = ocelot_port->ts_id;
- ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;
- skb_queue_tail(&ocelot_port->tx_skbs, skb);
+ skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;
+ /* Store timestamp ID in cb[0] of sk_buff */
+ clone->cb[0] = ocelot_port->ts_id;
+ ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;
+ skb_queue_tail(&ocelot_port->tx_skbs, clone);
- spin_unlock(&ocelot_port->ts_id_lock);
- return 0;
- }
- return -ENODATA;
+ spin_unlock(&ocelot_port->ts_id_lock);
}
EXPORT_SYMBOL(ocelot_port_add_txtstamp_skb);
@@ -511,9 +505,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot)
/* Set the timestamp into the skb */
memset(&shhwtstamps, 0, sizeof(shhwtstamps));
shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
- skb_tstamp_tx(skb_match, &shhwtstamps);
-
- dev_kfree_skb_any(skb_match);
+ skb_complete_tx_timestamp(skb_match, &shhwtstamps);
/* Next ts */
ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 8490e42e9e2d..028a0150f97d 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -330,7 +330,6 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
u8 grp = 0; /* Send everything on CPU group 0 */
unsigned int i, count, last;
int port = priv->chip_port;
- bool do_tstamp;
val = ocelot_read(ocelot, QS_INJ_STATUS);
if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp))) ||
@@ -345,7 +344,23 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
info.vid = skb_vlan_tag_get(skb);
/* Check if timestamping is needed */
- do_tstamp = (ocelot_port_add_txtstamp_skb(ocelot_port, skb) == 0);
+ if (ocelot->ptp && (shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
+ info.rew_op = ocelot_port->ptp_cmd;
+
+ if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+ struct sk_buff *clone;
+
+ clone = skb_clone_sk(skb);
+ if (!clone) {
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ ocelot_port_add_txtstamp_skb(ocelot, port, clone);
+
+ info.rew_op |= clone->cb[0] << 3;
+ }
+ }
if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) {
info.rew_op = ocelot_port->ptp_cmd;
@@ -383,8 +398,7 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
- if (!do_tstamp)
- dev_kfree_skb_any(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index ac02369174a9..53851853562c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -111,7 +111,9 @@ static int
nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
struct bpf_prog *prog)
{
- int i, cnt, err;
+ int i, cnt, err = 0;
+
+ mutex_lock(&prog->aux->used_maps_mutex);
/* Quickly count the maps we will have to remember */
cnt = 0;
@@ -119,13 +121,15 @@ nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
cnt++;
if (!cnt)
- return 0;
+ goto out;
nfp_prog->map_records = kmalloc_array(cnt,
sizeof(nfp_prog->map_records[0]),
GFP_KERNEL);
- if (!nfp_prog->map_records)
- return -ENOMEM;
+ if (!nfp_prog->map_records) {
+ err = -ENOMEM;
+ goto out;
+ }
for (i = 0; i < prog->aux->used_map_cnt; i++)
if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
@@ -133,12 +137,14 @@ nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
prog->aux->used_maps[i]);
if (err) {
nfp_map_ptrs_forget(bpf, nfp_prog);
- return err;
+ goto out;
}
}
WARN_ON(cnt != nfp_prog->map_records_cnt);
- return 0;
+out:
+ mutex_unlock(&prog->aux->used_maps_mutex);
+ return err;
}
static int
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index e291e6ac40cb..4e44313b7651 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1239,7 +1239,7 @@ static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct cp_private *cp = netdev_priv(dev);
unsigned long flags;
- int rc, i;
+ int i;
netdev_warn(dev, "Transmit timeout, status %2x %4x %4x %4x\n",
cpr8(Cmd), cpr16(CpCmd),
@@ -1260,7 +1260,7 @@ static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue)
cp_stop_hw(cp);
cp_clean_rings(cp);
- rc = cp_init_rings(cp);
+ cp_init_rings(cp);
cp_start_hw(cp);
__cp_set_rx_mode(dev);
cpw16_f(IntrMask, cp_norx_intr_mask);
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 1ce81ff2f41d..25c25ea6da66 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -12,6 +12,7 @@
#include <linux/phy.h>
#include <linux/platform_device.h>
+#define MDIO_MODE_REG 0x40
#define MDIO_ADDR_REG 0x44
#define MDIO_DATA_WRITE_REG 0x48
#define MDIO_DATA_READ_REG 0x4c
@@ -20,9 +21,15 @@
#define MDIO_CMD_ACCESS_START BIT(8)
#define MDIO_CMD_ACCESS_CODE_READ 0
#define MDIO_CMD_ACCESS_CODE_WRITE 1
+#define MDIO_CMD_ACCESS_CODE_C45_ADDR 0
+#define MDIO_CMD_ACCESS_CODE_C45_WRITE 1
+#define MDIO_CMD_ACCESS_CODE_C45_READ 2
-#define ipq4019_MDIO_TIMEOUT 10000
-#define ipq4019_MDIO_SLEEP 10
+/* 0 = Clause 22, 1 = Clause 45 */
+#define MDIO_MODE_C45 BIT(8)
+
+#define IPQ4019_MDIO_TIMEOUT 10000
+#define IPQ4019_MDIO_SLEEP 10
struct ipq4019_mdio_data {
void __iomem *membase;
@@ -35,25 +42,50 @@ static int ipq4019_mdio_wait_busy(struct mii_bus *bus)
return readl_poll_timeout(priv->membase + MDIO_CMD_REG, busy,
(busy & MDIO_CMD_ACCESS_BUSY) == 0,
- ipq4019_MDIO_SLEEP, ipq4019_MDIO_TIMEOUT);
+ IPQ4019_MDIO_SLEEP, IPQ4019_MDIO_TIMEOUT);
}
static int ipq4019_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
{
struct ipq4019_mdio_data *priv = bus->priv;
+ unsigned int data;
unsigned int cmd;
- /* Reject clause 45 */
- if (regnum & MII_ADDR_C45)
- return -EOPNOTSUPP;
-
if (ipq4019_mdio_wait_busy(bus))
return -ETIMEDOUT;
- /* issue the phy address and reg */
- writel((mii_id << 8) | regnum, priv->membase + MDIO_ADDR_REG);
+ /* Clause 45 support */
+ if (regnum & MII_ADDR_C45) {
+ unsigned int mmd = (regnum >> 16) & 0x1F;
+ unsigned int reg = regnum & 0xFFFF;
+
+ /* Enter Clause 45 mode */
+ data = readl(priv->membase + MDIO_MODE_REG);
+
+ data |= MDIO_MODE_C45;
+
+ writel(data, priv->membase + MDIO_MODE_REG);
+
+ /* issue the phy address and mmd */
+ writel((mii_id << 8) | mmd, priv->membase + MDIO_ADDR_REG);
+
+ /* issue reg */
+ writel(reg, priv->membase + MDIO_DATA_WRITE_REG);
+
+ cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_C45_ADDR;
+ } else {
+ /* Enter Clause 22 mode */
+ data = readl(priv->membase + MDIO_MODE_REG);
- cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_READ;
+ data &= ~MDIO_MODE_C45;
+
+ writel(data, priv->membase + MDIO_MODE_REG);
+
+ /* issue the phy address and reg */
+ writel((mii_id << 8) | regnum, priv->membase + MDIO_ADDR_REG);
+
+ cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_READ;
+ }
/* issue read command */
writel(cmd, priv->membase + MDIO_CMD_REG);
@@ -62,6 +94,15 @@ static int ipq4019_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
if (ipq4019_mdio_wait_busy(bus))
return -ETIMEDOUT;
+ if (regnum & MII_ADDR_C45) {
+ cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_C45_READ;
+
+ writel(cmd, priv->membase + MDIO_CMD_REG);
+
+ if (ipq4019_mdio_wait_busy(bus))
+ return -ETIMEDOUT;
+ }
+
/* Read and return data */
return readl(priv->membase + MDIO_DATA_READ_REG);
}
@@ -70,23 +111,57 @@ static int ipq4019_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
u16 value)
{
struct ipq4019_mdio_data *priv = bus->priv;
+ unsigned int data;
unsigned int cmd;
- /* Reject clause 45 */
- if (regnum & MII_ADDR_C45)
- return -EOPNOTSUPP;
-
if (ipq4019_mdio_wait_busy(bus))
return -ETIMEDOUT;
- /* issue the phy address and reg */
- writel((mii_id << 8) | regnum, priv->membase + MDIO_ADDR_REG);
+ /* Clause 45 support */
+ if (regnum & MII_ADDR_C45) {
+ unsigned int mmd = (regnum >> 16) & 0x1F;
+ unsigned int reg = regnum & 0xFFFF;
+
+ /* Enter Clause 45 mode */
+ data = readl(priv->membase + MDIO_MODE_REG);
+
+ data |= MDIO_MODE_C45;
+
+ writel(data, priv->membase + MDIO_MODE_REG);
+
+ /* issue the phy address and mmd */
+ writel((mii_id << 8) | mmd, priv->membase + MDIO_ADDR_REG);
+
+ /* issue reg */
+ writel(reg, priv->membase + MDIO_DATA_WRITE_REG);
+
+ cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_C45_ADDR;
+
+ writel(cmd, priv->membase + MDIO_CMD_REG);
+
+ if (ipq4019_mdio_wait_busy(bus))
+ return -ETIMEDOUT;
+ } else {
+ /* Enter Clause 22 mode */
+ data = readl(priv->membase + MDIO_MODE_REG);
+
+ data &= ~MDIO_MODE_C45;
+
+ writel(data, priv->membase + MDIO_MODE_REG);
+
+ /* issue the phy address and reg */
+ writel((mii_id << 8) | regnum, priv->membase + MDIO_ADDR_REG);
+ }
/* issue write data */
writel(value, priv->membase + MDIO_DATA_WRITE_REG);
- cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_WRITE;
/* issue write command */
+ if (regnum & MII_ADDR_C45)
+ cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_C45_WRITE;
+ else
+ cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_WRITE;
+
writel(cmd, priv->membase + MDIO_CMD_REG);
/* Wait write complete */
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index c43d97682083..62bb9272dcb2 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -93,6 +93,9 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs,
case PHY_INTERFACE_MODE_USXGMII:
lynx_pcs_get_state_usxgmii(lynx->mdio, state);
break;
+ case PHY_INTERFACE_MODE_10GBASER:
+ phylink_mii_c45_pcs_get_state(lynx->mdio, state);
+ break;
default:
break;
}
@@ -172,6 +175,9 @@ static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
break;
case PHY_INTERFACE_MODE_USXGMII:
return lynx_pcs_config_usxgmii(lynx->mdio, mode, advertising);
+ case PHY_INTERFACE_MODE_10GBASER:
+ /* Nothing to do here for 10GBASER */
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index de5b869139d7..8d333d3084ed 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -6,6 +6,11 @@
#include <linux/phy.h>
#include <linux/of.h>
+/**
+ * phy_speed_to_str - Return a string representing the PHY link speed
+ *
+ * @speed: Speed of the link
+ */
const char *phy_speed_to_str(int speed)
{
BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 92,
@@ -52,6 +57,11 @@ const char *phy_speed_to_str(int speed)
}
EXPORT_SYMBOL_GPL(phy_speed_to_str);
+/**
+ * phy_duplex_to_str - Return string describing the duplex
+ *
+ * @duplex: Duplex setting to describe
+ */
const char *phy_duplex_to_str(unsigned int duplex)
{
if (duplex == DUPLEX_HALF)
@@ -252,6 +262,16 @@ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
return __set_linkmode_max_speed(max_speed, phydev->supported);
}
+/**
+ * phy_set_max_speed - Set the maximum speed the PHY should support
+ *
+ * @phydev: The phy_device struct
+ * @max_speed: Maximum speed
+ *
+ * The PHY might be more capable than the MAC. For example a Fast Ethernet
+ * is connected to a 1G PHY. This function allows the MAC to indicate its
+ * maximum speed, and so limit what the PHY will advertise.
+ */
int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
{
int err;
@@ -308,6 +328,16 @@ void of_set_phy_eee_broken(struct phy_device *phydev)
phydev->eee_broken_modes = broken;
}
+/**
+ * phy_resolve_aneg_pause - Determine pause autoneg results
+ *
+ * @phydev: The phy_device struct
+ *
+ * Once autoneg has completed the local pause settings can be
+ * resolved. Determine if pause and asymmetric pause should be used
+ * by the MAC.
+ */
+
void phy_resolve_aneg_pause(struct phy_device *phydev)
{
if (phydev->duplex == DUPLEX_FULL) {
@@ -321,7 +351,7 @@ void phy_resolve_aneg_pause(struct phy_device *phydev)
EXPORT_SYMBOL_GPL(phy_resolve_aneg_pause);
/**
- * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
+ * phy_resolve_aneg_linkmode - resolve the advertisements into PHY settings
* @phydev: The phy_device struct
*
* Resolve our and the link partner advertisements into their corresponding
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 8947d58f2a25..35525a671400 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -456,7 +456,16 @@ int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
EXPORT_SYMBOL(phy_do_ioctl);
-/* same as phy_do_ioctl, but ensures that net_device is running */
+/**
+ * phy_do_ioctl_running - generic ndo_do_ioctl implementation but test first
+ *
+ * @dev: the net_device struct
+ * @ifr: &struct ifreq for socket ioctl's
+ * @cmd: ioctl cmd to execute
+ *
+ * Same as phy_do_ioctl, but ensures that net_device is running before
+ * handling the ioctl.
+ */
int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd)
{
if (!netif_running(dev))
@@ -466,6 +475,12 @@ int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd)
}
EXPORT_SYMBOL(phy_do_ioctl_running);
+/**
+ * phy_queue_state_machine - Trigger the state machine to run soon
+ *
+ * @phydev: the phy_device struct
+ * @jiffies: Run the state machine after these jiffies
+ */
void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies)
{
mod_delayed_work(system_power_efficient_wq, &phydev->state_queue,
@@ -473,6 +488,11 @@ void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies)
}
EXPORT_SYMBOL(phy_queue_state_machine);
+/**
+ * phy_queue_state_machine - Trigger the state machine to run now
+ *
+ * @phydev: the phy_device struct
+ */
static void phy_trigger_machine(struct phy_device *phydev)
{
phy_queue_state_machine(phydev, 0);
@@ -489,6 +509,12 @@ static void phy_abort_cable_test(struct phy_device *phydev)
phydev_err(phydev, "Error while aborting cable test");
}
+/**
+ * phy_ethtool_get_strings - Get the statistic counter names
+ *
+ * @phydev: the phy_device struct
+ * @data: Where to put the strings
+ */
int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data)
{
if (!phydev->drv)
@@ -502,6 +528,11 @@ int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data)
}
EXPORT_SYMBOL(phy_ethtool_get_strings);
+/**
+ * phy_ethtool_get_sset_count - Get the number of statistic counters
+ *
+ * @phydev: the phy_device struct
+ */
int phy_ethtool_get_sset_count(struct phy_device *phydev)
{
int ret;
@@ -523,6 +554,13 @@ int phy_ethtool_get_sset_count(struct phy_device *phydev)
}
EXPORT_SYMBOL(phy_ethtool_get_sset_count);
+/**
+ * phy_ethtool_get_stats - Get the statistic counters
+ *
+ * @phydev: the phy_device struct
+ * @stats: What counters to get
+ * @data: Where to store the counters
+ */
int phy_ethtool_get_stats(struct phy_device *phydev,
struct ethtool_stats *stats, u64 *data)
{
@@ -537,6 +575,12 @@ int phy_ethtool_get_stats(struct phy_device *phydev,
}
EXPORT_SYMBOL(phy_ethtool_get_stats);
+/**
+ * phy_start_cable_test - Start a cable test
+ *
+ * @phydev: the phy_device struct
+ * @extack: extack for reporting useful error messages
+ */
int phy_start_cable_test(struct phy_device *phydev,
struct netlink_ext_ack *extack)
{
@@ -600,6 +644,13 @@ out:
}
EXPORT_SYMBOL(phy_start_cable_test);
+/**
+ * phy_start_cable_test_tdr - Start a raw TDR cable test
+ *
+ * @phydev: the phy_device struct
+ * @extack: extack for reporting useful error messages
+ * @config: Configuration of the test to run
+ */
int phy_start_cable_test_tdr(struct phy_device *phydev,
struct netlink_ext_ack *extack,
const struct phy_tdr_config *config)
@@ -1363,6 +1414,12 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
}
EXPORT_SYMBOL(phy_ethtool_set_eee);
+/**
+ * phy_ethtool_set_wol - Configure Wake On LAN
+ *
+ * @phydev: target phy_device struct
+ * @wol: Configuration requested
+ */
int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
{
if (phydev->drv && phydev->drv->set_wol)
@@ -1372,6 +1429,12 @@ int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
}
EXPORT_SYMBOL(phy_ethtool_set_wol);
+/**
+ * phy_ethtool_get_wol - Get the current Wake On LAN configuration
+ *
+ * @phydev: target phy_device struct
+ * @wol: Store the current configuration here
+ */
void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
{
if (phydev->drv && phydev->drv->get_wol)
@@ -1405,6 +1468,10 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev,
}
EXPORT_SYMBOL(phy_ethtool_set_link_ksettings);
+/**
+ * phy_ethtool_nway_reset - Restart auto negotiation
+ * @ndev: Network device to restart autoneg for
+ */
int phy_ethtool_nway_reset(struct net_device *ndev)
{
struct phy_device *phydev = ndev->phydev;
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index cb32d7ef4938..4daf94bb56a5 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -338,6 +338,29 @@ unregister:
EXPORT_SYMBOL(of_mdiobus_register);
/**
+ * of_mdio_find_device - Given a device tree node, find the mdio_device
+ * @np: pointer to the mdio_device's device tree node
+ *
+ * If successful, returns a pointer to the mdio_device with the embedded
+ * struct device refcount incremented by one, or NULL on failure.
+ * The caller should call put_device() on the mdio_device after its use
+ */
+struct mdio_device *of_mdio_find_device(struct device_node *np)
+{
+ struct device *d;
+
+ if (!np)
+ return NULL;
+
+ d = bus_find_device_by_of_node(&mdio_bus_type, np);
+ if (!d)
+ return NULL;
+
+ return to_mdio_device(d);
+}
+EXPORT_SYMBOL(of_mdio_find_device);
+
+/**
* of_phy_find_device - Give a PHY node, find the phy_device
* @phy_np: Pointer to the phy's device tree node
*
@@ -346,19 +369,16 @@ EXPORT_SYMBOL(of_mdiobus_register);
*/
struct phy_device *of_phy_find_device(struct device_node *phy_np)
{
- struct device *d;
struct mdio_device *mdiodev;
- if (!phy_np)
+ mdiodev = of_mdio_find_device(phy_np);
+ if (!mdiodev)
return NULL;
- d = bus_find_device_by_of_node(&mdio_bus_type, phy_np);
- if (d) {
- mdiodev = to_mdio_device(d);
- if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
- return to_phy_device(d);
- put_device(d);
- }
+ if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
+ return to_phy_device(&mdiodev->dev);
+
+ put_device(&mdiodev->dev);
return NULL;
}
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 2c14012ca35d..f321eabefbe4 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -195,8 +195,8 @@ struct qeth_vnicc_info {
#define QETH_IN_BUF_SIZE_DEFAULT 65536
#define QETH_IN_BUF_COUNT_DEFAULT 64
#define QETH_IN_BUF_COUNT_HSDEFAULT 128
-#define QETH_IN_BUF_COUNT_MIN 8
-#define QETH_IN_BUF_COUNT_MAX 128
+#define QETH_IN_BUF_COUNT_MIN 8U
+#define QETH_IN_BUF_COUNT_MAX 128U
#define QETH_MAX_BUFFER_ELEMENTS(card) ((card)->qdio.in_buf_size >> 12)
#define QETH_IN_BUF_REQUEUE_THRESHOLD(card) \
((card)->qdio.in_buf_pool.buf_count / 2)
@@ -753,7 +753,7 @@ struct qeth_discipline {
const struct device_type *devtype;
int (*setup) (struct ccwgroup_device *);
void (*remove) (struct ccwgroup_device *);
- int (*set_online)(struct qeth_card *card);
+ int (*set_online)(struct qeth_card *card, bool carrier_ok);
void (*set_offline)(struct qeth_card *card);
int (*do_ioctl)(struct net_device *dev, struct ifreq *rq, int cmd);
int (*control_event_handler)(struct qeth_card *card,
@@ -814,12 +814,16 @@ struct qeth_card {
struct workqueue_struct *event_wq;
struct workqueue_struct *cmd_wq;
wait_queue_head_t wait_q;
+
+ struct mutex ip_lock;
+ /* protected by ip_lock: */
DECLARE_HASHTABLE(ip_htable, 4);
+ struct qeth_ipato ipato;
+
DECLARE_HASHTABLE(local_addrs4, 4);
DECLARE_HASHTABLE(local_addrs6, 4);
spinlock_t local_addrs4_lock;
spinlock_t local_addrs6_lock;
- struct mutex ip_lock;
DECLARE_HASHTABLE(rx_mode_addrs, 4);
struct work_struct rx_mode_work;
struct work_struct kernel_thread_starter;
@@ -827,7 +831,6 @@ struct qeth_card {
unsigned long thread_start_mask;
unsigned long thread_allowed_mask;
unsigned long thread_running_mask;
- struct qeth_ipato ipato;
struct list_head cmd_waiter_list;
/* QDIO buffer handling */
struct qeth_qdio_info qdio;
@@ -1034,11 +1037,8 @@ struct net_device *qeth_clone_netdev(struct net_device *orig);
struct qeth_card *qeth_get_card_by_busid(char *bus_id);
void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int);
int qeth_threads_running(struct qeth_card *, unsigned long);
-int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok);
-int qeth_stop_channel(struct qeth_channel *channel);
int qeth_set_offline(struct qeth_card *card, bool resetting);
-void qeth_print_status_message(struct qeth_card *);
int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
int (*reply_cb)
(struct qeth_card *, struct qeth_reply *, unsigned long),
@@ -1062,12 +1062,7 @@ void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason);
void qeth_put_cmd(struct qeth_cmd_buffer *iob);
int qeth_schedule_recovery(struct qeth_card *card);
-void qeth_flush_local_addrs(struct qeth_card *card);
int qeth_poll(struct napi_struct *napi, int budget);
-void qeth_clear_ipacmd_list(struct qeth_card *);
-int qeth_qdio_clear_card(struct qeth_card *, int);
-void qeth_clear_working_pool_list(struct qeth_card *);
-void qeth_drain_output_queues(struct qeth_card *card);
void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable);
int qeth_setadpparms_change_macaddr(struct qeth_card *);
void qeth_tx_timeout(struct net_device *, unsigned int txqueue);
@@ -1091,7 +1086,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...);
int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
-void qeth_trace_features(struct qeth_card *);
int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long);
int qeth_setup_netdev(struct qeth_card *card);
int qeth_set_features(struct net_device *, netdev_features_t);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 7cd0cbf8a4f0..fc2c3db9259f 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -201,7 +201,7 @@ int qeth_threads_running(struct qeth_card *card, unsigned long threads)
}
EXPORT_SYMBOL_GPL(qeth_threads_running);
-void qeth_clear_working_pool_list(struct qeth_card *card)
+static void qeth_clear_working_pool_list(struct qeth_card *card)
{
struct qeth_buffer_pool_entry *pool_entry, *tmp;
struct qeth_qdio_q *queue = card->qdio.in_q;
@@ -216,7 +216,6 @@ void qeth_clear_working_pool_list(struct qeth_card *card)
for (i = 0; i < ARRAY_SIZE(queue->bufs); i++)
queue->bufs[i].pool_entry = NULL;
}
-EXPORT_SYMBOL_GPL(qeth_clear_working_pool_list);
static void qeth_free_pool_entry(struct qeth_buffer_pool_entry *entry)
{
@@ -658,12 +657,11 @@ static void qeth_flush_local_addrs6(struct qeth_card *card)
spin_unlock_irq(&card->local_addrs6_lock);
}
-void qeth_flush_local_addrs(struct qeth_card *card)
+static void qeth_flush_local_addrs(struct qeth_card *card)
{
qeth_flush_local_addrs4(card);
qeth_flush_local_addrs6(card);
}
-EXPORT_SYMBOL_GPL(qeth_flush_local_addrs);
static void qeth_add_local_addrs4(struct qeth_card *card,
struct qeth_ipacmd_local_addrs4 *cmd)
@@ -965,7 +963,7 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
}
}
-void qeth_clear_ipacmd_list(struct qeth_card *card)
+static void qeth_clear_ipacmd_list(struct qeth_card *card)
{
struct qeth_cmd_buffer *iob;
unsigned long flags;
@@ -977,7 +975,6 @@ void qeth_clear_ipacmd_list(struct qeth_card *card)
qeth_notify_cmd(iob, -ECANCELED);
spin_unlock_irqrestore(&card->lock, flags);
}
-EXPORT_SYMBOL_GPL(qeth_clear_ipacmd_list);
static int qeth_check_idx_response(struct qeth_card *card,
unsigned char *buffer)
@@ -1502,7 +1499,7 @@ static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free)
}
}
-void qeth_drain_output_queues(struct qeth_card *card)
+static void qeth_drain_output_queues(struct qeth_card *card)
{
int i;
@@ -1513,7 +1510,6 @@ void qeth_drain_output_queues(struct qeth_card *card)
qeth_drain_output_queue(card->qdio.out_qs[i], false);
}
}
-EXPORT_SYMBOL_GPL(qeth_drain_output_queues);
static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
{
@@ -1754,7 +1750,7 @@ static int qeth_halt_channel(struct qeth_card *card,
return 0;
}
-int qeth_stop_channel(struct qeth_channel *channel)
+static int qeth_stop_channel(struct qeth_channel *channel)
{
struct ccw_device *cdev = channel->ccwdev;
int rc;
@@ -1772,7 +1768,6 @@ int qeth_stop_channel(struct qeth_channel *channel)
return rc;
}
-EXPORT_SYMBOL_GPL(qeth_stop_channel);
static int qeth_start_channel(struct qeth_channel *channel)
{
@@ -1842,7 +1837,7 @@ static int qeth_clear_halt_card(struct qeth_card *card, int halt)
return qeth_clear_channels(card);
}
-int qeth_qdio_clear_card(struct qeth_card *card, int use_halt)
+static int qeth_qdio_clear_card(struct qeth_card *card, int use_halt)
{
int rc = 0;
@@ -1870,7 +1865,6 @@ int qeth_qdio_clear_card(struct qeth_card *card, int use_halt)
QETH_CARD_TEXT_(card, 3, "2err%d", rc);
return rc;
}
-EXPORT_SYMBOL_GPL(qeth_qdio_clear_card);
static enum qeth_discipline_id qeth_vm_detect_layer(struct qeth_card *card)
{
@@ -2867,7 +2861,7 @@ static int qeth_mpc_initialize(struct qeth_card *card)
return 0;
}
-void qeth_print_status_message(struct qeth_card *card)
+static void qeth_print_status_message(struct qeth_card *card)
{
switch (card->info.type) {
case QETH_CARD_TYPE_OSD:
@@ -2908,7 +2902,6 @@ void qeth_print_status_message(struct qeth_card *card)
(card->info.mcl_level[0]) ? ")" : "",
qeth_get_cardname_short(card));
}
-EXPORT_SYMBOL_GPL(qeth_print_status_message);
static void qeth_initialize_working_pool_list(struct qeth_card *card)
{
@@ -5124,7 +5117,7 @@ static void qeth_core_free_card(struct qeth_card *card)
kfree(card);
}
-void qeth_trace_features(struct qeth_card *card)
+static void qeth_trace_features(struct qeth_card *card)
{
QETH_CARD_TEXT(card, 2, "features");
QETH_CARD_HEX(card, 2, &card->options.ipa4, sizeof(card->options.ipa4));
@@ -5133,7 +5126,6 @@ void qeth_trace_features(struct qeth_card *card)
QETH_CARD_HEX(card, 2, &card->info.diagass_support,
sizeof(card->info.diagass_support));
}
-EXPORT_SYMBOL_GPL(qeth_trace_features);
static struct ccw_device_id qeth_ids[] = {
{CCW_DEVICE_DEVTYPE(0x1731, 0x01, 0x1732, 0x01),
@@ -5164,7 +5156,7 @@ static struct ccw_driver qeth_ccw_driver = {
.remove = ccwgroup_remove_ccwdev,
};
-int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok)
+static int qeth_hardsetup_card(struct qeth_card *card, bool *carrier_ok)
{
int retries = 3;
int rc;
@@ -5278,6 +5270,8 @@ retriable:
QETH_CARD_TEXT_(card, 2, "8err%d", rc);
}
+ qeth_trace_features(card);
+
if (!qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP) ||
(card->info.hwtrap && qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM)))
card->info.hwtrap = 0;
@@ -5303,21 +5297,49 @@ out:
CARD_DEVID(card), rc);
return rc;
}
-EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
static int qeth_set_online(struct qeth_card *card)
{
+ bool carrier_ok;
int rc;
mutex_lock(&card->discipline_mutex);
mutex_lock(&card->conf_mutex);
QETH_CARD_TEXT(card, 2, "setonlin");
- rc = card->discipline->set_online(card);
+ rc = qeth_hardsetup_card(card, &carrier_ok);
+ if (rc) {
+ QETH_CARD_TEXT_(card, 2, "2err%04x", rc);
+ rc = -ENODEV;
+ goto err_hardsetup;
+ }
+
+ qeth_print_status_message(card);
+
+ rc = card->discipline->set_online(card, carrier_ok);
+ if (rc)
+ goto err_online;
+
+ /* let user_space know that device is online */
+ kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE);
mutex_unlock(&card->conf_mutex);
mutex_unlock(&card->discipline_mutex);
+ return 0;
+err_online:
+err_hardsetup:
+ qeth_qdio_clear_card(card, 0);
+ qeth_clear_working_pool_list(card);
+ qeth_flush_local_addrs(card);
+
+ qeth_stop_channel(&card->data);
+ qeth_stop_channel(&card->write);
+ qeth_stop_channel(&card->read);
+ qdio_free(CARD_DDEV(card));
+
+ mutex_unlock(&card->conf_mutex);
+ mutex_unlock(&card->discipline_mutex);
return rc;
}
@@ -5334,6 +5356,9 @@ int qeth_set_offline(struct qeth_card *card, bool resetting)
card->info.hwtrap = 1;
}
+ /* cancel any stalled cmd that might block the rtnl: */
+ qeth_clear_ipacmd_list(card);
+
rtnl_lock();
card->info.open_when_online = card->dev->flags & IFF_UP;
dev_close(card->dev);
@@ -5341,8 +5366,16 @@ int qeth_set_offline(struct qeth_card *card, bool resetting)
netif_carrier_off(card->dev);
rtnl_unlock();
+ cancel_work_sync(&card->rx_mode_work);
+
card->discipline->set_offline(card);
+ qeth_qdio_clear_card(card, 0);
+ qeth_drain_output_queues(card);
+ qeth_clear_working_pool_list(card);
+ qeth_flush_local_addrs(card);
+ card->info.promisc_mode = 0;
+
rc = qeth_stop_channel(&card->data);
rc2 = qeth_stop_channel(&card->write);
rc3 = qeth_stop_channel(&card->read);
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 8def82336f53..74c70364edc1 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -103,21 +103,21 @@ static ssize_t qeth_dev_portno_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
- char *tmp;
unsigned int portno, limit;
int rc = 0;
+ rc = kstrtouint(buf, 16, &portno);
+ if (rc)
+ return rc;
+ if (portno > QETH_MAX_PORTNO)
+ return -EINVAL;
+
mutex_lock(&card->conf_mutex);
if (card->state != CARD_STATE_DOWN) {
rc = -EPERM;
goto out;
}
- portno = simple_strtoul(buf, &tmp, 16);
- if (portno > QETH_MAX_PORTNO) {
- rc = -EINVAL;
- goto out;
- }
limit = (card->ssqd.pcnt ? card->ssqd.pcnt - 1 : card->ssqd.pcnt);
if (portno > limit) {
rc = -EINVAL;
@@ -248,19 +248,19 @@ static ssize_t qeth_dev_bufcnt_store(struct device *dev,
{
struct qeth_card *card = dev_get_drvdata(dev);
unsigned int cnt;
- char *tmp;
int rc = 0;
+ rc = kstrtouint(buf, 10, &cnt);
+ if (rc)
+ return rc;
+
mutex_lock(&card->conf_mutex);
if (card->state != CARD_STATE_DOWN) {
rc = -EPERM;
goto out;
}
- cnt = simple_strtoul(buf, &tmp, 10);
- cnt = (cnt < QETH_IN_BUF_COUNT_MIN) ? QETH_IN_BUF_COUNT_MIN :
- ((cnt > QETH_IN_BUF_COUNT_MAX) ? QETH_IN_BUF_COUNT_MAX : cnt);
-
+ cnt = clamp(cnt, QETH_IN_BUF_COUNT_MIN, QETH_IN_BUF_COUNT_MAX);
rc = qeth_resize_buffer_pool(card, cnt);
out:
@@ -341,18 +341,15 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
{
struct qeth_card *card = dev_get_drvdata(dev);
struct net_device *ndev;
- char *tmp;
- int i, rc = 0;
enum qeth_discipline_id newdis;
+ unsigned int input;
+ int rc;
- mutex_lock(&card->discipline_mutex);
- if (card->state != CARD_STATE_DOWN) {
- rc = -EPERM;
- goto out;
- }
+ rc = kstrtouint(buf, 16, &input);
+ if (rc)
+ return rc;
- i = simple_strtoul(buf, &tmp, 16);
- switch (i) {
+ switch (input) {
case 0:
newdis = QETH_DISCIPLINE_LAYER3;
break;
@@ -360,7 +357,12 @@ static ssize_t qeth_dev_layer2_store(struct device *dev,
newdis = QETH_DISCIPLINE_LAYER2;
break;
default:
- rc = -EINVAL;
+ return -EINVAL;
+ }
+
+ mutex_lock(&card->discipline_mutex);
+ if (card->state != CARD_STATE_DOWN) {
+ rc = -EPERM;
goto out;
}
@@ -551,20 +553,21 @@ static DEVICE_ATTR(hw_trap, 0644, qeth_hw_trap_show,
static ssize_t qeth_dev_blkt_store(struct qeth_card *card,
const char *buf, size_t count, int *value, int max_value)
{
- char *tmp;
- int i, rc = 0;
+ unsigned int input;
+ int rc;
+
+ rc = kstrtouint(buf, 10, &input);
+ if (rc)
+ return rc;
+
+ if (input > max_value)
+ return -EINVAL;
mutex_lock(&card->conf_mutex);
- if (card->state != CARD_STATE_DOWN) {
+ if (card->state != CARD_STATE_DOWN)
rc = -EPERM;
- goto out;
- }
- i = simple_strtoul(buf, &tmp, 10);
- if (i <= max_value)
- *value = i;
else
- rc = -EINVAL;
-out:
+ *value = input;
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h
index cc95675c8bc4..296d73d84326 100644
--- a/drivers/s390/net/qeth_l2.h
+++ b/drivers/s390/net/qeth_l2.h
@@ -31,4 +31,11 @@ struct qeth_mac {
struct hlist_node hnode;
};
+static inline bool qeth_bridgeport_is_in_use(struct qeth_card *card)
+{
+ return card->options.sbp.role ||
+ card->options.sbp.reflect_promisc ||
+ card->options.sbp.hostnotification;
+}
+
#endif /* __QETH_L2_H__ */
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index e12ac32b8b47..1852d0a3c10a 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -28,17 +28,6 @@
#include "qeth_core.h"
#include "qeth_l2.h"
-static void qeth_bridgeport_query_support(struct qeth_card *card);
-static void qeth_bridge_state_change(struct qeth_card *card,
- struct qeth_ipa_cmd *cmd);
-static void qeth_addr_change_event(struct qeth_card *card,
- struct qeth_ipa_cmd *cmd);
-static bool qeth_bridgeport_is_in_use(struct qeth_card *card);
-static void qeth_l2_vnicc_set_defaults(struct qeth_card *card);
-static void qeth_l2_vnicc_init(struct qeth_card *card);
-static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc,
- u32 *timeout);
-
static int qeth_l2_setdelmac_makerc(struct qeth_card *card, u16 retcode)
{
int rc;
@@ -304,36 +293,6 @@ static void qeth_l2_dev2br_fdb_flush(struct qeth_card *card)
card->dev, &info.info, NULL);
}
-static void qeth_l2_stop_card(struct qeth_card *card)
-{
- struct qeth_priv *priv = netdev_priv(card->dev);
-
- QETH_CARD_TEXT(card, 2, "stopcard");
-
- qeth_set_allowed_threads(card, 0, 1);
-
- cancel_work_sync(&card->rx_mode_work);
- qeth_l2_drain_rx_mode_cache(card);
-
- if (card->state == CARD_STATE_SOFTSETUP) {
- qeth_clear_ipacmd_list(card);
- card->state = CARD_STATE_DOWN;
- }
-
- qeth_qdio_clear_card(card, 0);
- qeth_drain_output_queues(card);
- qeth_clear_working_pool_list(card);
- qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
- qeth_flush_local_addrs(card);
- card->info.promisc_mode = 0;
-
- if (priv->brport_features & BR_LEARNING_SYNC) {
- rtnl_lock();
- qeth_l2_dev2br_fdb_flush(card);
- rtnl_unlock();
- }
-}
-
static int qeth_l2_request_initial_mac(struct qeth_card *card)
{
int rc = 0;
@@ -617,49 +576,6 @@ static u16 qeth_l2_select_queue(struct net_device *dev, struct sk_buff *skb,
qeth_get_priority_queue(card, skb);
}
-static const struct device_type qeth_l2_devtype = {
- .name = "qeth_layer2",
- .groups = qeth_l2_attr_groups,
-};
-
-static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
-{
- struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- int rc;
-
- if (IS_OSN(card))
- dev_notice(&gdev->dev, "OSN support will be dropped in 2021\n");
-
- qeth_l2_vnicc_set_defaults(card);
- mutex_init(&card->sbp_lock);
-
- if (gdev->dev.type == &qeth_generic_devtype) {
- rc = qeth_l2_create_device_attributes(&gdev->dev);
- if (rc)
- return rc;
- }
-
- INIT_WORK(&card->rx_mode_work, qeth_l2_rx_mode_work);
- return 0;
-}
-
-static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
-{
- struct qeth_card *card = dev_get_drvdata(&cgdev->dev);
-
- if (cgdev->dev.type == &qeth_generic_devtype)
- qeth_l2_remove_device_attributes(&cgdev->dev);
- qeth_set_allowed_threads(card, 0, 1);
- wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
-
- if (cgdev->state == CCWGROUP_ONLINE)
- qeth_set_offline(card, false);
-
- cancel_work_sync(&card->close_dev_work);
- if (card->dev->reg_state == NETREG_REGISTERED)
- unregister_netdev(card->dev);
-}
-
static void qeth_l2_set_rx_mode(struct net_device *dev)
{
struct qeth_card *card = dev->ml_priv;
@@ -1140,134 +1056,6 @@ static void qeth_l2_enable_brport_features(struct qeth_card *card)
}
}
-static int qeth_l2_set_online(struct qeth_card *card)
-{
- struct ccwgroup_device *gdev = card->gdev;
- struct net_device *dev = card->dev;
- int rc = 0;
- bool carrier_ok;
-
- rc = qeth_core_hardsetup_card(card, &carrier_ok);
- if (rc) {
- QETH_CARD_TEXT_(card, 2, "2err%04x", rc);
- rc = -ENODEV;
- goto out_remove;
- }
-
- /* query before bridgeport_notification may be enabled */
- qeth_l2_detect_dev2br_support(card);
-
- mutex_lock(&card->sbp_lock);
- qeth_bridgeport_query_support(card);
- if (card->options.sbp.supported_funcs) {
- qeth_l2_setup_bridgeport_attrs(card);
- dev_info(&card->gdev->dev,
- "The device represents a Bridge Capable Port\n");
- }
- mutex_unlock(&card->sbp_lock);
-
- qeth_l2_register_dev_addr(card);
-
- /* for the rx_bcast characteristic, init VNICC after setmac */
- qeth_l2_vnicc_init(card);
-
- qeth_trace_features(card);
- qeth_l2_trace_features(card);
-
- qeth_print_status_message(card);
-
- /* softsetup */
- QETH_CARD_TEXT(card, 2, "softsetp");
-
- card->state = CARD_STATE_SOFTSETUP;
-
- qeth_set_allowed_threads(card, 0xffffffff, 0);
-
- if (dev->reg_state != NETREG_REGISTERED) {
- rc = qeth_l2_setup_netdev(card);
- if (rc)
- goto out_remove;
-
- if (carrier_ok)
- netif_carrier_on(dev);
- } else {
- rtnl_lock();
- if (carrier_ok)
- netif_carrier_on(dev);
- else
- netif_carrier_off(dev);
-
- netif_device_attach(dev);
- qeth_enable_hw_features(dev);
- qeth_l2_enable_brport_features(card);
-
- if (card->info.open_when_online) {
- card->info.open_when_online = 0;
- dev_open(dev, NULL);
- }
- rtnl_unlock();
- }
- /* let user_space know that device is online */
- kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
- return 0;
-
-out_remove:
- qeth_l2_stop_card(card);
- qeth_stop_channel(&card->data);
- qeth_stop_channel(&card->write);
- qeth_stop_channel(&card->read);
- qdio_free(CARD_DDEV(card));
- return rc;
-}
-
-static void qeth_l2_set_offline(struct qeth_card *card)
-{
- qeth_l2_stop_card(card);
-}
-
-static int __init qeth_l2_init(void)
-{
- pr_info("register layer 2 discipline\n");
- return 0;
-}
-
-static void __exit qeth_l2_exit(void)
-{
- pr_info("unregister layer 2 discipline\n");
-}
-
-/* Returns zero if the command is successfully "consumed" */
-static int qeth_l2_control_event(struct qeth_card *card,
- struct qeth_ipa_cmd *cmd)
-{
- switch (cmd->hdr.command) {
- case IPA_CMD_SETBRIDGEPORT_OSA:
- case IPA_CMD_SETBRIDGEPORT_IQD:
- if (cmd->data.sbp.hdr.command_code ==
- IPA_SBP_BRIDGE_PORT_STATE_CHANGE) {
- qeth_bridge_state_change(card, cmd);
- return 0;
- } else
- return 1;
- case IPA_CMD_ADDRESS_CHANGE_NOTIF:
- qeth_addr_change_event(card, cmd);
- return 0;
- default:
- return 1;
- }
-}
-
-struct qeth_discipline qeth_l2_discipline = {
- .devtype = &qeth_l2_devtype,
- .setup = qeth_l2_probe_device,
- .remove = qeth_l2_remove_device,
- .set_online = qeth_l2_set_online,
- .set_offline = qeth_l2_set_offline,
- .do_ioctl = NULL,
- .control_event_handler = qeth_l2_control_event,
-};
-EXPORT_SYMBOL_GPL(qeth_l2_discipline);
-
#ifdef CONFIG_QETH_OSN
static void qeth_osn_assist_cb(struct qeth_card *card,
struct qeth_cmd_buffer *iob,
@@ -1987,12 +1775,6 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable)
return rc;
}
-static bool qeth_bridgeport_is_in_use(struct qeth_card *card)
-{
- return (card->options.sbp.role || card->options.sbp.reflect_promisc ||
- card->options.sbp.hostnotification);
-}
-
/* VNIC Characteristics support */
/* handle VNICC IPA command return codes; convert to error codes */
@@ -2138,6 +1920,19 @@ static int qeth_l2_vnicc_getset_timeout(struct qeth_card *card, u32 vnicc,
return qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, timeout);
}
+/* recover user timeout setting */
+static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc,
+ u32 *timeout)
+{
+ if (card->options.vnicc.sup_chars & vnicc &&
+ card->options.vnicc.getset_timeout_sup & vnicc &&
+ !qeth_l2_vnicc_getset_timeout(card, vnicc, IPA_VNICC_SET_TIMEOUT,
+ timeout))
+ return false;
+ *timeout = QETH_VNICC_DEFAULT_TIMEOUT;
+ return true;
+}
+
/* set current VNICC flag state; called from sysfs store function */
int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state)
{
@@ -2308,19 +2103,6 @@ bool qeth_bridgeport_allowed(struct qeth_card *card)
!(priv->brport_features & BR_LEARNING_SYNC));
}
-/* recover user timeout setting */
-static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc,
- u32 *timeout)
-{
- if (card->options.vnicc.sup_chars & vnicc &&
- card->options.vnicc.getset_timeout_sup & vnicc &&
- !qeth_l2_vnicc_getset_timeout(card, vnicc, IPA_VNICC_SET_TIMEOUT,
- timeout))
- return false;
- *timeout = QETH_VNICC_DEFAULT_TIMEOUT;
- return true;
-}
-
/* recover user characteristic setting */
static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc,
bool enable)
@@ -2409,6 +2191,174 @@ static void qeth_l2_vnicc_set_defaults(struct qeth_card *card)
card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT;
}
+static const struct device_type qeth_l2_devtype = {
+ .name = "qeth_layer2",
+ .groups = qeth_l2_attr_groups,
+};
+
+static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
+{
+ struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+ int rc;
+
+ if (IS_OSN(card))
+ dev_notice(&gdev->dev, "OSN support will be dropped in 2021\n");
+
+ qeth_l2_vnicc_set_defaults(card);
+ mutex_init(&card->sbp_lock);
+
+ if (gdev->dev.type == &qeth_generic_devtype) {
+ rc = qeth_l2_create_device_attributes(&gdev->dev);
+ if (rc)
+ return rc;
+ }
+
+ INIT_WORK(&card->rx_mode_work, qeth_l2_rx_mode_work);
+ return 0;
+}
+
+static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
+{
+ struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+
+ if (gdev->dev.type == &qeth_generic_devtype)
+ qeth_l2_remove_device_attributes(&gdev->dev);
+ qeth_set_allowed_threads(card, 0, 1);
+ wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
+
+ if (gdev->state == CCWGROUP_ONLINE)
+ qeth_set_offline(card, false);
+
+ cancel_work_sync(&card->close_dev_work);
+ if (card->dev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(card->dev);
+}
+
+static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
+{
+ struct net_device *dev = card->dev;
+ int rc = 0;
+
+ /* query before bridgeport_notification may be enabled */
+ qeth_l2_detect_dev2br_support(card);
+
+ mutex_lock(&card->sbp_lock);
+ qeth_bridgeport_query_support(card);
+ if (card->options.sbp.supported_funcs) {
+ qeth_l2_setup_bridgeport_attrs(card);
+ dev_info(&card->gdev->dev,
+ "The device represents a Bridge Capable Port\n");
+ }
+ mutex_unlock(&card->sbp_lock);
+
+ qeth_l2_register_dev_addr(card);
+
+ /* for the rx_bcast characteristic, init VNICC after setmac */
+ qeth_l2_vnicc_init(card);
+
+ qeth_l2_trace_features(card);
+
+ /* softsetup */
+ QETH_CARD_TEXT(card, 2, "softsetp");
+
+ card->state = CARD_STATE_SOFTSETUP;
+
+ qeth_set_allowed_threads(card, 0xffffffff, 0);
+
+ if (dev->reg_state != NETREG_REGISTERED) {
+ rc = qeth_l2_setup_netdev(card);
+ if (rc)
+ goto err_setup;
+
+ if (carrier_ok)
+ netif_carrier_on(dev);
+ } else {
+ rtnl_lock();
+ if (carrier_ok)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+
+ netif_device_attach(dev);
+ qeth_enable_hw_features(dev);
+ qeth_l2_enable_brport_features(card);
+
+ if (card->info.open_when_online) {
+ card->info.open_when_online = 0;
+ dev_open(dev, NULL);
+ }
+ rtnl_unlock();
+ }
+ return 0;
+
+err_setup:
+ qeth_set_allowed_threads(card, 0, 1);
+ card->state = CARD_STATE_DOWN;
+ return rc;
+}
+
+static void qeth_l2_set_offline(struct qeth_card *card)
+{
+ struct qeth_priv *priv = netdev_priv(card->dev);
+
+ qeth_set_allowed_threads(card, 0, 1);
+ qeth_l2_drain_rx_mode_cache(card);
+
+ if (card->state == CARD_STATE_SOFTSETUP)
+ card->state = CARD_STATE_DOWN;
+
+ qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
+ if (priv->brport_features & BR_LEARNING_SYNC) {
+ rtnl_lock();
+ qeth_l2_dev2br_fdb_flush(card);
+ rtnl_unlock();
+ }
+}
+
+/* Returns zero if the command is successfully "consumed" */
+static int qeth_l2_control_event(struct qeth_card *card,
+ struct qeth_ipa_cmd *cmd)
+{
+ switch (cmd->hdr.command) {
+ case IPA_CMD_SETBRIDGEPORT_OSA:
+ case IPA_CMD_SETBRIDGEPORT_IQD:
+ if (cmd->data.sbp.hdr.command_code ==
+ IPA_SBP_BRIDGE_PORT_STATE_CHANGE) {
+ qeth_bridge_state_change(card, cmd);
+ return 0;
+ }
+
+ return 1;
+ case IPA_CMD_ADDRESS_CHANGE_NOTIF:
+ qeth_addr_change_event(card, cmd);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+struct qeth_discipline qeth_l2_discipline = {
+ .devtype = &qeth_l2_devtype,
+ .setup = qeth_l2_probe_device,
+ .remove = qeth_l2_remove_device,
+ .set_online = qeth_l2_set_online,
+ .set_offline = qeth_l2_set_offline,
+ .do_ioctl = NULL,
+ .control_event_handler = qeth_l2_control_event,
+};
+EXPORT_SYMBOL_GPL(qeth_l2_discipline);
+
+static int __init qeth_l2_init(void)
+{
+ pr_info("register layer 2 discipline\n");
+ return 0;
+}
+
+static void __exit qeth_l2_exit(void)
+{
+ pr_info("unregister layer 2 discipline\n");
+}
+
module_init(qeth_l2_init);
module_exit(qeth_l2_exit);
MODULE_AUTHOR("Frank Blaschka <[email protected]>");
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 6ccfe2121095..acd130cfbab3 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -96,7 +96,7 @@ struct qeth_ipato_entry {
struct list_head entry;
enum qeth_prot_versions proto;
char addr[16];
- int mask_bits;
+ unsigned int mask_bits;
};
extern const struct attribute_group *qeth_l3_attr_groups[];
@@ -110,7 +110,7 @@ int qeth_l3_setrouting_v6(struct qeth_card *);
int qeth_l3_add_ipato_entry(struct qeth_card *, struct qeth_ipato_entry *);
int qeth_l3_del_ipato_entry(struct qeth_card *card,
enum qeth_prot_versions proto, u8 *addr,
- int mask_bits);
+ unsigned int mask_bits);
void qeth_l3_update_ipato(struct qeth_card *card);
int qeth_l3_modify_hsuid(struct qeth_card *card, bool add);
int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 33fdad1a6887..a6f8878b55c6 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -105,11 +105,9 @@ static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
(ipatoe->proto == QETH_PROT_IPV4) ?
4 : 16);
if (addr->proto == QETH_PROT_IPV4)
- rc = !memcmp(addr_bits, ipatoe_bits,
- min(32, ipatoe->mask_bits));
+ rc = !memcmp(addr_bits, ipatoe_bits, ipatoe->mask_bits);
else
- rc = !memcmp(addr_bits, ipatoe_bits,
- min(128, ipatoe->mask_bits));
+ rc = !memcmp(addr_bits, ipatoe_bits, ipatoe->mask_bits);
if (rc)
break;
}
@@ -536,7 +534,6 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
QETH_CARD_TEXT(card, 2, "addipato");
- mutex_lock(&card->conf_mutex);
mutex_lock(&card->ip_lock);
list_for_each_entry(ipatoe, &card->ipato.entries, entry) {
@@ -556,21 +553,19 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
}
mutex_unlock(&card->ip_lock);
- mutex_unlock(&card->conf_mutex);
return rc;
}
int qeth_l3_del_ipato_entry(struct qeth_card *card,
enum qeth_prot_versions proto, u8 *addr,
- int mask_bits)
+ unsigned int mask_bits)
{
struct qeth_ipato_entry *ipatoe, *tmp;
int rc = -ENOENT;
QETH_CARD_TEXT(card, 2, "delipato");
- mutex_lock(&card->conf_mutex);
mutex_lock(&card->ip_lock);
list_for_each_entry_safe(ipatoe, tmp, &card->ipato.entries, entry) {
@@ -587,7 +582,6 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card,
}
mutex_unlock(&card->ip_lock);
- mutex_unlock(&card->conf_mutex);
return rc;
}
@@ -597,7 +591,6 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
enum qeth_prot_versions proto)
{
struct qeth_ipaddr addr;
- int rc;
qeth_l3_init_ipaddr(&addr, type, proto);
if (proto == QETH_PROT_IPV4)
@@ -605,11 +598,7 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
else
memcpy(&addr.u.a6.addr, ip, 16);
- mutex_lock(&card->conf_mutex);
- rc = qeth_l3_modify_ip(card, &addr, add);
- mutex_unlock(&card->conf_mutex);
-
- return rc;
+ return qeth_l3_modify_ip(card, &addr, add);
}
int qeth_l3_modify_hsuid(struct qeth_card *card, bool add)
@@ -1153,33 +1142,6 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
return 0;
}
-static void qeth_l3_stop_card(struct qeth_card *card)
-{
- QETH_CARD_TEXT(card, 2, "stopcard");
-
- qeth_set_allowed_threads(card, 0, 1);
-
- cancel_work_sync(&card->rx_mode_work);
- qeth_l3_drain_rx_mode_cache(card);
-
- if (card->options.sniffer &&
- (card->info.promisc_mode == SET_PROMISC_MODE_ON))
- qeth_diags_trace(card, QETH_DIAGS_CMD_TRACE_DISABLE);
-
- if (card->state == CARD_STATE_SOFTSETUP) {
- qeth_l3_clear_ip_htable(card, 1);
- qeth_clear_ipacmd_list(card);
- card->state = CARD_STATE_DOWN;
- }
-
- qeth_qdio_clear_card(card, 0);
- qeth_drain_output_queues(card);
- qeth_clear_working_pool_list(card);
- flush_workqueue(card->event_wq);
- qeth_flush_local_addrs(card);
- card->info.promisc_mode = 0;
-}
-
static void qeth_l3_set_promisc_mode(struct qeth_card *card)
{
bool enable = card->dev->flags & IFF_PROMISC;
@@ -1235,7 +1197,6 @@ static void qeth_l3_rx_mode_work(struct work_struct *work)
kfree(addr);
break;
}
- addr->ref_counter = 1;
fallthrough;
default:
/* for next call to set_rx_mode(): */
@@ -2025,21 +1986,10 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
qeth_l3_clear_ipato_list(card);
}
-static int qeth_l3_set_online(struct qeth_card *card)
+static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok)
{
- struct ccwgroup_device *gdev = card->gdev;
struct net_device *dev = card->dev;
int rc = 0;
- bool carrier_ok;
-
- rc = qeth_core_hardsetup_card(card, &carrier_ok);
- if (rc) {
- QETH_CARD_TEXT_(card, 2, "2err%04x", rc);
- rc = -ENODEV;
- goto out_remove;
- }
-
- qeth_print_status_message(card);
/* softsetup */
QETH_CARD_TEXT(card, 2, "softsetp");
@@ -2066,7 +2016,7 @@ static int qeth_l3_set_online(struct qeth_card *card)
if (dev->reg_state != NETREG_REGISTERED) {
rc = qeth_l3_setup_netdev(card);
if (rc)
- goto out_remove;
+ goto err_setup;
if (carrier_ok)
netif_carrier_on(dev);
@@ -2086,22 +2036,28 @@ static int qeth_l3_set_online(struct qeth_card *card)
}
rtnl_unlock();
}
- qeth_trace_features(card);
- /* let user_space know that device is online */
- kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
return 0;
-out_remove:
- qeth_l3_stop_card(card);
- qeth_stop_channel(&card->data);
- qeth_stop_channel(&card->write);
- qeth_stop_channel(&card->read);
- qdio_free(CARD_DDEV(card));
+
+err_setup:
+ qeth_set_allowed_threads(card, 0, 1);
+ card->state = CARD_STATE_DOWN;
+ qeth_l3_clear_ip_htable(card, 1);
return rc;
}
static void qeth_l3_set_offline(struct qeth_card *card)
{
- qeth_l3_stop_card(card);
+ qeth_set_allowed_threads(card, 0, 1);
+ qeth_l3_drain_rx_mode_cache(card);
+
+ if (card->options.sniffer &&
+ (card->info.promisc_mode == SET_PROMISC_MODE_ON))
+ qeth_diags_trace(card, QETH_DIAGS_CMD_TRACE_DISABLE);
+
+ if (card->state == CARD_STATE_SOFTSETUP) {
+ card->state = CARD_STATE_DOWN;
+ qeth_l3_clear_ip_htable(card, 1);
+ }
}
/* Returns zero if the command is successfully "consumed" */
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index dd0b39082534..351763ae9b9c 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -301,19 +301,21 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
goto out;
}
+ mutex_lock(&card->ip_lock);
if (sysfs_streq(buf, "toggle")) {
enable = !card->ipato.enabled;
} else if (kstrtobool(buf, &enable)) {
rc = -EINVAL;
- goto out;
+ goto unlock_ip;
}
if (card->ipato.enabled != enable) {
card->ipato.enabled = enable;
- mutex_lock(&card->ip_lock);
qeth_l3_update_ipato(card);
- mutex_unlock(&card->ip_lock);
}
+
+unlock_ip:
+ mutex_unlock(&card->ip_lock);
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
@@ -339,7 +341,7 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev,
bool invert;
int rc = 0;
- mutex_lock(&card->conf_mutex);
+ mutex_lock(&card->ip_lock);
if (sysfs_streq(buf, "toggle")) {
invert = !card->ipato.invert4;
} else if (kstrtobool(buf, &invert)) {
@@ -349,12 +351,11 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev,
if (card->ipato.invert4 != invert) {
card->ipato.invert4 = invert;
- mutex_lock(&card->ip_lock);
qeth_l3_update_ipato(card);
- mutex_unlock(&card->ip_lock);
}
+
out:
- mutex_unlock(&card->conf_mutex);
+ mutex_unlock(&card->ip_lock);
return rc ? rc : count;
}
@@ -406,29 +407,29 @@ static ssize_t qeth_l3_dev_ipato_add4_show(struct device *dev,
}
static int qeth_l3_parse_ipatoe(const char *buf, enum qeth_prot_versions proto,
- u8 *addr, int *mask_bits)
+ u8 *addr, unsigned int *mask_bits)
{
- const char *start, *end;
- char *tmp;
- char buffer[40] = {0, };
+ char *sep;
+ int rc;
- start = buf;
- /* get address string */
- end = strchr(start, '/');
- if (!end || (end - start >= 40)) {
+ /* Expected input pattern: %addr/%mask */
+ sep = strnchr(buf, 40, '/');
+ if (!sep)
return -EINVAL;
- }
- strncpy(buffer, start, end - start);
- if (qeth_l3_string_to_ipaddr(buffer, proto, addr)) {
- return -EINVAL;
- }
- start = end + 1;
- *mask_bits = simple_strtoul(start, &tmp, 10);
- if (!strlen(start) ||
- (tmp == start) ||
- (*mask_bits > ((proto == QETH_PROT_IPV4) ? 32 : 128))) {
+
+ /* Terminate the %addr sub-string, and parse it: */
+ *sep = '\0';
+ rc = qeth_l3_string_to_ipaddr(buf, proto, addr);
+ if (rc)
+ return rc;
+
+ rc = kstrtouint(sep + 1, 10, mask_bits);
+ if (rc)
+ return rc;
+
+ if (*mask_bits > ((proto == QETH_PROT_IPV4) ? 32 : 128))
return -EINVAL;
- }
+
return 0;
}
@@ -436,8 +437,8 @@ static ssize_t qeth_l3_dev_ipato_add_store(const char *buf, size_t count,
struct qeth_card *card, enum qeth_prot_versions proto)
{
struct qeth_ipato_entry *ipatoe;
+ unsigned int mask_bits;
u8 addr[16];
- int mask_bits;
int rc = 0;
rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits);
@@ -474,8 +475,8 @@ static QETH_DEVICE_ATTR(ipato_add4, add4, 0644,
static ssize_t qeth_l3_dev_ipato_del_store(const char *buf, size_t count,
struct qeth_card *card, enum qeth_prot_versions proto)
{
+ unsigned int mask_bits;
u8 addr[16];
- int mask_bits;
int rc = 0;
rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits);
@@ -510,7 +511,7 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev,
bool invert;
int rc = 0;
- mutex_lock(&card->conf_mutex);
+ mutex_lock(&card->ip_lock);
if (sysfs_streq(buf, "toggle")) {
invert = !card->ipato.invert6;
} else if (kstrtobool(buf, &invert)) {
@@ -520,12 +521,11 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev,
if (card->ipato.invert6 != invert) {
card->ipato.invert6 = invert;
- mutex_lock(&card->ip_lock);
qeth_l3_update_ipato(card);
- mutex_unlock(&card->ip_lock);
}
+
out:
- mutex_unlock(&card->conf_mutex);
+ mutex_unlock(&card->ip_lock);
return rc ? rc : count;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c6d9f2c444f4..fc5c901c7542 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -292,6 +292,7 @@ enum bpf_arg_type {
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
+ __BPF_ARG_TYPE_MAX,
};
/* type of values returned from helper functions */
@@ -326,12 +327,16 @@ struct bpf_func_proto {
};
enum bpf_arg_type arg_type[5];
};
- int *btf_id; /* BTF ids of arguments */
- bool (*check_btf_id)(u32 btf_id, u32 arg); /* if the argument btf_id is
- * valid. Often used if more
- * than one btf id is permitted
- * for this argument.
- */
+ union {
+ struct {
+ u32 *arg1_btf_id;
+ u32 *arg2_btf_id;
+ u32 *arg3_btf_id;
+ u32 *arg4_btf_id;
+ u32 *arg5_btf_id;
+ };
+ u32 *arg_btf_id[5];
+ };
int *ret_btf_id; /* return value btf_id */
bool (*allowed)(const struct bpf_prog *prog);
};
@@ -697,16 +702,19 @@ enum bpf_jit_poke_reason {
/* Descriptor of pokes pointing /into/ the JITed image. */
struct bpf_jit_poke_descriptor {
- void *ip;
+ void *tailcall_target;
+ void *tailcall_bypass;
+ void *bypass_addr;
union {
struct {
struct bpf_map *map;
u32 key;
} tail_call;
};
- bool ip_stable;
+ bool tailcall_target_stable;
u8 adj_off;
u16 reason;
+ u32 insn_idx;
};
/* reg_type info for ctx arguments */
@@ -737,6 +745,7 @@ struct bpf_prog_aux {
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
bool func_proto_unreliable;
bool sleepable;
+ bool tail_call_reachable;
enum bpf_tramp_prog_type trampoline_prog_type;
struct bpf_trampoline *trampoline;
struct hlist_node tramp_hlist;
@@ -751,6 +760,7 @@ struct bpf_prog_aux {
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
+ struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */
struct bpf_prog *prog;
struct user_struct *user;
u64 load_time; /* ns since boottime */
@@ -1380,8 +1390,6 @@ int btf_struct_access(struct bpf_verifier_log *log,
u32 *next_btf_id);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
int off, u32 id, u32 need_type_id);
-int btf_resolve_helper_id(struct bpf_verifier_log *log,
- const struct bpf_func_proto *fn, int);
int btf_distill_func_proto(struct bpf_verifier_log *log,
struct btf *btf,
@@ -1900,6 +1908,6 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
struct btf_id_set;
-bool btf_id_set_contains(struct btf_id_set *set, u32 id);
+bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 53c7bd568c5d..2bb48a2c4d08 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -358,6 +358,9 @@ struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
+ bool has_tail_call;
+ bool tail_call_reachable;
+ bool has_ld_abs;
};
/* single container for all structs
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 210b086188a3..57890b357f85 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -76,6 +76,13 @@ extern u32 name[];
#define BTF_ID_LIST_GLOBAL(name) \
__BTF_ID_LIST(name, globl)
+/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
+ * a single entry.
+ */
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) \
+ BTF_ID_LIST(name) \
+ BTF_ID(prefix, typename)
+
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
* It's used when we want to define 'unused' entry
@@ -140,6 +147,7 @@ extern struct btf_id_set name;
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
#define BTF_SET_END(name)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 05b4052715b9..20fc24c9779a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1287,6 +1287,8 @@ int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
struct bpf_sk_lookup_kern {
u16 family;
u16 protocol;
+ __be16 sport;
+ u16 dport;
struct {
__be32 saddr;
__be32 daddr;
@@ -1295,8 +1297,6 @@ struct bpf_sk_lookup_kern {
const struct in6_addr *saddr;
const struct in6_addr *daddr;
} v6;
- __be16 sport;
- u16 dport;
struct sock *selected_sk;
bool no_reuseport;
};
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 6479a38e52fa..556caed00258 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -19,7 +19,13 @@ struct br_ip {
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ip6;
#endif
- } u;
+ } src;
+ union {
+ __be32 ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr ip6;
+#endif
+ } dst;
__be16 proto;
__u16 vid;
};
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 3a88b699b758..dbd69b3d170b 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -306,7 +306,7 @@ static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising)
/**
* mii_10gbt_stat_mod_linkmode_lpa_t
* @advertising: target the linkmode advertisement settings
- * @adv: value of the C45 10GBASE-T AN STATUS register
+ * @lpa: value of the C45 10GBASE-T AN STATUS register
*
* A small helper function that translates C45 10GBASE-T AN STATUS register bits
* to linkmode advertisement settings. Other bits in advertising aren't changed.
@@ -371,6 +371,7 @@ struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr);
/**
* mdio_module_driver() - Helper macro for registering mdio drivers
+ * @_mdio_driver: driver to register
*
* Helper macro for MDIO drivers which do not do anything special in module
* init/exit. Each module may only use this macro once, and calling it
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 1efb88d9f892..cfe8c607a628 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -17,6 +17,7 @@ bool of_mdiobus_child_is_phy(struct device_node *child);
int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);
int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio,
struct device_node *np);
+struct mdio_device *of_mdio_find_device(struct device_node *np);
struct phy_device *of_phy_find_device(struct device_node *phy_np);
struct phy_device *
of_phy_connect(struct net_device *dev, struct device_node *phy_np,
@@ -74,6 +75,11 @@ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *
return mdiobus_register(mdio);
}
+static inline struct mdio_device *of_mdio_find_device(struct device_node *np)
+{
+ return NULL;
+}
+
static inline struct phy_device *of_phy_find_device(struct device_node *phy_np)
{
return NULL;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3a09d2bf69ea..eb3cb1a98b45 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -82,7 +82,39 @@ extern const int phy_10gbit_features_array[1];
#define PHY_POLL_CABLE_TEST 0x00000004
#define MDIO_DEVICE_IS_PHY 0x80000000
-/* Interface Mode definitions */
+/**
+ * enum phy_interface_t - Interface Mode definitions
+ *
+ * @PHY_INTERFACE_MODE_NA: Not Applicable - don't touch
+ * @PHY_INTERFACE_MODE_INTERNAL: No interface, MAC and PHY combined
+ * @PHY_INTERFACE_MODE_MII: Median-independent interface
+ * @PHY_INTERFACE_MODE_GMII: Gigabit median-independent interface
+ * @PHY_INTERFACE_MODE_SGMII: Serial gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface
+ * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface
+ * @PHY_INTERFACE_MODE_RMII: Reduced Media Independent Interface
+ * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay
+ * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay
+ * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay
+ * @PHY_INTERFACE_MODE_RTBI: Reduced TBI
+ * @PHY_INTERFACE_MODE_SMII: ??? MII
+ * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_XLGMII:40 gigabit media-independent interface
+ * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax
+ * @PHY_INTERFACE_MODE_QSGMII: Quad SGMII
+ * @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII
+ * @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX
+ * @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX
+ * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI
+ * @PHY_INTERFACE_MODE_XAUI: 10 Gigabit Attachment Unit Interface
+ * @PHY_INTERFACE_MODE_10GBASER: 10G BaseR
+ * @PHY_INTERFACE_MODE_USXGMII: Universal Serial 10GE MII
+ * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN
+ * @PHY_INTERFACE_MODE_MAX: Book keeping
+ *
+ * Describes the interface between the MAC and PHY.
+ */
typedef enum {
PHY_INTERFACE_MODE_NA,
PHY_INTERFACE_MODE_INTERNAL,
@@ -116,8 +148,8 @@ typedef enum {
} phy_interface_t;
/**
- * phy_supported_speeds - return all speeds currently supported by a phy device
- * @phy: The phy device to return supported speeds of.
+ * phy_supported_speeds - return all speeds currently supported by a PHY device
+ * @phy: The PHY device to return supported speeds of.
* @speeds: buffer to store supported speeds in.
* @size: size of speeds buffer.
*
@@ -134,9 +166,9 @@ unsigned int phy_supported_speeds(struct phy_device *phy,
* phy_modes - map phy_interface_t enum to device tree binding of phy-mode
* @interface: enum phy_interface_t value
*
- * Description: maps 'enum phy_interface_t' defined in this file
+ * Description: maps enum &phy_interface_t defined in this file
* into the device tree binding of 'phy-mode', so that Ethernet
- * device driver can get phy interface from device tree.
+ * device driver can get PHY interface from device tree.
*/
static inline const char *phy_modes(phy_interface_t interface)
{
@@ -215,6 +247,14 @@ struct sfp_bus;
struct sfp_upstream_ops;
struct sk_buff;
+/**
+ * struct mdio_bus_stats - Statistics counters for MDIO busses
+ * @transfers: Total number of transfers, i.e. @writes + @reads
+ * @errors: Number of MDIO transfers that returned an error
+ * @writes: Number of write transfers
+ * @reads: Number of read transfers
+ * @syncp: Synchronisation for incrementing statistics
+ */
struct mdio_bus_stats {
u64_stats_t transfers;
u64_stats_t errors;
@@ -224,7 +264,15 @@ struct mdio_bus_stats {
struct u64_stats_sync syncp;
};
-/* Represents a shared structure between different phydev's in the same
+/**
+ * struct phy_package_shared - Shared information in PHY packages
+ * @addr: Common PHY address used to combine PHYs in one package
+ * @refcnt: Number of PHYs connected to this shared data
+ * @flags: Initialization of PHY package
+ * @priv_size: Size of the shared private data @priv
+ * @priv: Driver private data shared across a PHY package
+ *
+ * Represents a shared structure between different phydev's in the same
* package, for example a quad PHY. See phy_package_join() and
* phy_package_leave().
*/
@@ -247,7 +295,14 @@ struct phy_package_shared {
#define PHY_SHARED_F_INIT_DONE 0
#define PHY_SHARED_F_PROBE_DONE 1
-/*
+/**
+ * struct mii_bus - Represents an MDIO bus
+ *
+ * @owner: Who owns this device
+ * @name: User friendly name for this MDIO device, or driver name
+ * @id: Unique identifier for this bus, typical from bus hierarchy
+ * @priv: Driver private data
+ *
* The Bus class for PHYs. Devices which provide access to
* PHYs should register using this structure
*/
@@ -256,49 +311,58 @@ struct mii_bus {
const char *name;
char id[MII_BUS_ID_SIZE];
void *priv;
+ /** @read: Perform a read transfer on the bus */
int (*read)(struct mii_bus *bus, int addr, int regnum);
+ /** @write: Perform a write transfer on the bus */
int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val);
+ /** @reset: Perform a reset of the bus */
int (*reset)(struct mii_bus *bus);
+
+ /** @stats: Statistic counters per device on the bus */
struct mdio_bus_stats stats[PHY_MAX_ADDR];
- /*
- * A lock to ensure that only one thing can read/write
+ /**
+ * @mdio_lock: A lock to ensure that only one thing can read/write
* the MDIO bus at a time
*/
struct mutex mdio_lock;
+ /** @parent: Parent device of this bus */
struct device *parent;
+ /** @state: State of bus structure */
enum {
MDIOBUS_ALLOCATED = 1,
MDIOBUS_REGISTERED,
MDIOBUS_UNREGISTERED,
MDIOBUS_RELEASED,
} state;
+
+ /** @dev: Kernel device representation */
struct device dev;
- /* list of all PHYs on bus */
+ /** @mdio_map: list of all MDIO devices on bus */
struct mdio_device *mdio_map[PHY_MAX_ADDR];
- /* PHY addresses to be ignored when probing */
+ /** @phy_mask: PHY addresses to be ignored when probing */
u32 phy_mask;
- /* PHY addresses to ignore the TA/read failure */
+ /** @phy_ignore_ta_mask: PHY addresses to ignore the TA/read failure */
u32 phy_ignore_ta_mask;
- /*
- * An array of interrupts, each PHY's interrupt at the index
+ /**
+ * @irq: An array of interrupts, each PHY's interrupt at the index
* matching its address
*/
int irq[PHY_MAX_ADDR];
- /* GPIO reset pulse width in microseconds */
+ /** @reset_delay_us: GPIO reset pulse width in microseconds */
int reset_delay_us;
- /* GPIO reset deassert delay in microseconds */
+ /** @reset_post_delay_us: GPIO reset deassert delay in microseconds */
int reset_post_delay_us;
- /* RESET GPIO descriptor pointer */
+ /** @reset_gpiod: Reset GPIO descriptor pointer */
struct gpio_desc *reset_gpiod;
- /* bus capabilities, used for probing */
+ /** @probe_capabilities: bus capabilities, used for probing */
enum {
MDIOBUS_NO_CAP = 0,
MDIOBUS_C22,
@@ -306,15 +370,22 @@ struct mii_bus {
MDIOBUS_C22_C45,
} probe_capabilities;
- /* protect access to the shared element */
+ /** @shared_lock: protect access to the shared element */
struct mutex shared_lock;
- /* shared state across different PHYs */
+ /** @shared: shared state across different PHYs */
struct phy_package_shared *shared[PHY_MAX_ADDR];
};
#define to_mii_bus(d) container_of(d, struct mii_bus, dev)
-struct mii_bus *mdiobus_alloc_size(size_t);
+struct mii_bus *mdiobus_alloc_size(size_t size);
+
+/**
+ * mdiobus_alloc - Allocate an MDIO bus structure
+ *
+ * The internal state of the MDIO bus will be set of MDIOBUS_ALLOCATED ready
+ * for the driver to register the bus.
+ */
static inline struct mii_bus *mdiobus_alloc(void)
{
return mdiobus_alloc_size(0);
@@ -341,40 +412,41 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
#define PHY_INTERRUPT_DISABLED false
#define PHY_INTERRUPT_ENABLED true
-/* PHY state machine states:
+/**
+ * enum phy_state - PHY state machine states:
*
- * DOWN: PHY device and driver are not ready for anything. probe
+ * @PHY_DOWN: PHY device and driver are not ready for anything. probe
* should be called if and only if the PHY is in this state,
* given that the PHY device exists.
- * - PHY driver probe function will set the state to READY
+ * - PHY driver probe function will set the state to @PHY_READY
*
- * READY: PHY is ready to send and receive packets, but the
+ * @PHY_READY: PHY is ready to send and receive packets, but the
* controller is not. By default, PHYs which do not implement
* probe will be set to this state by phy_probe().
* - start will set the state to UP
*
- * UP: The PHY and attached device are ready to do work.
+ * @PHY_UP: The PHY and attached device are ready to do work.
* Interrupts should be started here.
- * - timer moves to NOLINK or RUNNING
+ * - timer moves to @PHY_NOLINK or @PHY_RUNNING
*
- * NOLINK: PHY is up, but not currently plugged in.
- * - irq or timer will set RUNNING if link comes back
- * - phy_stop moves to HALTED
+ * @PHY_NOLINK: PHY is up, but not currently plugged in.
+ * - irq or timer will set @PHY_RUNNING if link comes back
+ * - phy_stop moves to @PHY_HALTED
*
- * RUNNING: PHY is currently up, running, and possibly sending
+ * @PHY_RUNNING: PHY is currently up, running, and possibly sending
* and/or receiving packets
- * - irq or timer will set NOLINK if link goes down
- * - phy_stop moves to HALTED
+ * - irq or timer will set @PHY_NOLINK if link goes down
+ * - phy_stop moves to @PHY_HALTED
*
- * CABLETEST: PHY is performing a cable test. Packet reception/sending
+ * @PHY_CABLETEST: PHY is performing a cable test. Packet reception/sending
* is not expected to work, carrier will be indicated as down. PHY will be
* poll once per second, or on interrupt for it current state.
* Once complete, move to UP to restart the PHY.
- * - phy_stop aborts the running test and moves to HALTED
+ * - phy_stop aborts the running test and moves to @PHY_HALTED
*
- * HALTED: PHY is up, but no polling or interrupts are done. Or
+ * @PHY_HALTED: PHY is up, but no polling or interrupts are done. Or
* PHY is in an error state.
- * - phy_start moves to UP
+ * - phy_start moves to @PHY_UP
*/
enum phy_state {
PHY_DOWN = 0,
@@ -403,34 +475,67 @@ struct phy_c45_device_ids {
struct macsec_context;
struct macsec_ops;
-/* phy_device: An instance of a PHY
+/**
+ * struct phy_device - An instance of a PHY
*
- * drv: Pointer to the driver for this PHY instance
- * phy_id: UID for this device found during discovery
- * c45_ids: 802.3-c45 Device Identifers if is_c45.
- * is_c45: Set to true if this phy uses clause 45 addressing.
- * is_internal: Set to true if this phy is internal to a MAC.
- * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc.
- * is_gigabit_capable: Set to true if PHY supports 1000Mbps
- * has_fixups: Set to true if this phy has fixups/quirks.
- * suspended: Set to true if this phy has been suspended successfully.
- * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus.
- * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
- * loopback_enabled: Set true if this phy has been loopbacked successfully.
- * downshifted_rate: Set true if link speed has been downshifted.
- * state: state of the PHY for management purposes
- * dev_flags: Device-specific flags used by the PHY driver.
- * irq: IRQ number of the PHY's interrupt (-1 if none)
- * phy_timer: The timer for handling the state machine
- * sfp_bus_attached: flag indicating whether the SFP bus has been attached
- * sfp_bus: SFP bus attached to this PHY's fiber port
- * attached_dev: The attached enet driver's device instance ptr
- * adjust_link: Callback for the enet controller to respond to
- * changes in the link state.
- * macsec_ops: MACsec offloading ops.
+ * @mdio: MDIO bus this PHY is on
+ * @drv: Pointer to the driver for this PHY instance
+ * @phy_id: UID for this device found during discovery
+ * @c45_ids: 802.3-c45 Device Identifiers if is_c45.
+ * @is_c45: Set to true if this PHY uses clause 45 addressing.
+ * @is_internal: Set to true if this PHY is internal to a MAC.
+ * @is_pseudo_fixed_link: Set to true if this PHY is an Ethernet switch, etc.
+ * @is_gigabit_capable: Set to true if PHY supports 1000Mbps
+ * @has_fixups: Set to true if this PHY has fixups/quirks.
+ * @suspended: Set to true if this PHY has been suspended successfully.
+ * @suspended_by_mdio_bus: Set to true if this PHY was suspended by MDIO bus.
+ * @sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
+ * @loopback_enabled: Set true if this PHY has been loopbacked successfully.
+ * @downshifted_rate: Set true if link speed has been downshifted.
+ * @state: State of the PHY for management purposes
+ * @dev_flags: Device-specific flags used by the PHY driver.
+ * @irq: IRQ number of the PHY's interrupt (-1 if none)
+ * @phy_timer: The timer for handling the state machine
+ * @phylink: Pointer to phylink instance for this PHY
+ * @sfp_bus_attached: Flag indicating whether the SFP bus has been attached
+ * @sfp_bus: SFP bus attached to this PHY's fiber port
+ * @attached_dev: The attached enet driver's device instance ptr
+ * @adjust_link: Callback for the enet controller to respond to changes: in the
+ * link state.
+ * @phy_link_change: Callback for phylink for notification of link change
+ * @macsec_ops: MACsec offloading ops.
*
- * speed, duplex, pause, supported, advertising, lp_advertising,
- * and autoneg are used like in mii_if_info
+ * @speed: Current link speed
+ * @duplex: Current duplex
+ * @pause: Current pause
+ * @asym_pause: Current asymmetric pause
+ * @supported: Combined MAC/PHY supported linkmodes
+ * @advertising: Currently advertised linkmodes
+ * @adv_old: Saved advertised while power saving for WoL
+ * @lp_advertising: Current link partner advertised linkmodes
+ * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited
+ * @autoneg: Flag autoneg being used
+ * @link: Current link state
+ * @autoneg_complete: Flag auto negotiation of the link has completed
+ * @mdix: Current crossover
+ * @mdix_ctrl: User setting of crossover
+ * @interrupts: Flag interrupts have been enabled
+ * @interface: enum phy_interface_t value
+ * @skb: Netlink message for cable diagnostics
+ * @nest: Netlink nest used for cable diagnostics
+ * @ehdr: nNtlink header for cable diagnostics
+ * @phy_led_triggers: Array of LED triggers
+ * @phy_num_led_triggers: Number of triggers in @phy_led_triggers
+ * @led_link_trigger: LED trigger for link up/down
+ * @last_triggered: last LED trigger for link speed
+ * @master_slave_set: User requested master/slave configuration
+ * @master_slave_get: Current master/slave advertisement
+ * @master_slave_state: Current master/slave configuration
+ * @mii_ts: Pointer to time stamper callbacks
+ * @lock: Mutex for serialization access to PHY
+ * @state_queue: Work queue for state machine
+ * @shared: Pointer to private data shared by phys in one package
+ * @priv: Pointer to driver private data
*
* interrupts currently only supports enabled or disabled,
* but could be changed in the future to support enabling
@@ -550,9 +655,18 @@ struct phy_device {
#define to_phy_device(d) container_of(to_mdio_device(d), \
struct phy_device, mdio)
-/* A structure containing possible configuration parameters
+/**
+ * struct phy_tdr_config - Configuration of a TDR raw test
+ *
+ * @first: Distance for first data collection point
+ * @last: Distance for last data collection point
+ * @step: Step between data collection points
+ * @pair: Bitmap of cable pairs to collect data for
+ *
+ * A structure containing possible configuration parameters
* for a TDR cable test. The driver does not need to implement
* all the parameters, but should report what is actually used.
+ * All distances are in centimeters.
*/
struct phy_tdr_config {
u32 first;
@@ -562,18 +676,20 @@ struct phy_tdr_config {
};
#define PHY_PAIR_ALL -1
-/* struct phy_driver: Driver structure for a particular PHY type
+/**
+ * struct phy_driver - Driver structure for a particular PHY type
*
- * driver_data: static driver data
- * phy_id: The result of reading the UID registers of this PHY
+ * @mdiodrv: Data common to all MDIO devices
+ * @phy_id: The result of reading the UID registers of this PHY
* type, and ANDing them with the phy_id_mask. This driver
* only works for PHYs with IDs which match this field
- * name: The friendly name of this PHY type
- * phy_id_mask: Defines the important bits of the phy_id
- * features: A mandatory list of features (speed, duplex, etc)
+ * @name: The friendly name of this PHY type
+ * @phy_id_mask: Defines the important bits of the phy_id
+ * @features: A mandatory list of features (speed, duplex, etc)
* supported by this PHY
- * flags: A bitfield defining certain other features this PHY
+ * @flags: A bitfield defining certain other features this PHY
* supports (like interrupts)
+ * @driver_data: Static driver data
*
* All functions are optional. If config_aneg or read_status
* are not implemented, the phy core uses the genphy versions.
@@ -592,151 +708,178 @@ struct phy_driver {
u32 flags;
const void *driver_data;
- /*
- * Called to issue a PHY software reset
+ /**
+ * @soft_reset: Called to issue a PHY software reset
*/
int (*soft_reset)(struct phy_device *phydev);
- /*
- * Called to initialize the PHY,
+ /**
+ * @config_init: Called to initialize the PHY,
* including after a reset
*/
int (*config_init)(struct phy_device *phydev);
- /*
- * Called during discovery. Used to set
+ /**
+ * @probe: Called during discovery. Used to set
* up device-specific structures, if any
*/
int (*probe)(struct phy_device *phydev);
- /*
- * Probe the hardware to determine what abilities it has.
- * Should only set phydev->supported.
+ /**
+ * @get_features: Probe the hardware to determine what
+ * abilities it has. Should only set phydev->supported.
*/
int (*get_features)(struct phy_device *phydev);
/* PHY Power Management */
+ /** @suspend: Suspend the hardware, saving state if needed */
int (*suspend)(struct phy_device *phydev);
+ /** @resume: Resume the hardware, restoring state if needed */
int (*resume)(struct phy_device *phydev);
- /*
- * Configures the advertisement and resets
+ /**
+ * @config_aneg: Configures the advertisement and resets
* autonegotiation if phydev->autoneg is on,
* forces the speed to the current settings in phydev
* if phydev->autoneg is off
*/
int (*config_aneg)(struct phy_device *phydev);
- /* Determines the auto negotiation result */
+ /** @aneg_done: Determines the auto negotiation result */
int (*aneg_done)(struct phy_device *phydev);
- /* Determines the negotiated speed and duplex */
+ /** @read_status: Determines the negotiated speed and duplex */
int (*read_status)(struct phy_device *phydev);
- /* Clears any pending interrupts */
+ /** @ack_interrupt: Clears any pending interrupts */
int (*ack_interrupt)(struct phy_device *phydev);
- /* Enables or disables interrupts */
+ /** @config_intr: Enables or disables interrupts */
int (*config_intr)(struct phy_device *phydev);
- /*
- * Checks if the PHY generated an interrupt.
+ /**
+ * @did_interrupt: Checks if the PHY generated an interrupt.
* For multi-PHY devices with shared PHY interrupt pin
* Set interrupt bits have to be cleared.
*/
int (*did_interrupt)(struct phy_device *phydev);
- /* Override default interrupt handling */
+ /** @handle_interrupt: Override default interrupt handling */
irqreturn_t (*handle_interrupt)(struct phy_device *phydev);
- /* Clears up any memory if needed */
+ /** @remove: Clears up any memory if needed */
void (*remove)(struct phy_device *phydev);
- /* Returns true if this is a suitable driver for the given
- * phydev. If NULL, matching is based on phy_id and
- * phy_id_mask.
+ /**
+ * @match_phy_device: Returns true if this is a suitable
+ * driver for the given phydev. If NULL, matching is based on
+ * phy_id and phy_id_mask.
*/
int (*match_phy_device)(struct phy_device *phydev);
- /* Some devices (e.g. qnap TS-119P II) require PHY register changes to
- * enable Wake on LAN, so set_wol is provided to be called in the
- * ethernet driver's set_wol function. */
+ /**
+ * @set_wol: Some devices (e.g. qnap TS-119P II) require PHY
+ * register changes to enable Wake on LAN, so set_wol is
+ * provided to be called in the ethernet driver's set_wol
+ * function.
+ */
int (*set_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol);
- /* See set_wol, but for checking whether Wake on LAN is enabled. */
+ /**
+ * @get_wol: See set_wol, but for checking whether Wake on LAN
+ * is enabled.
+ */
void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol);
- /*
- * Called to inform a PHY device driver when the core is about to
- * change the link state. This callback is supposed to be used as
- * fixup hook for drivers that need to take action when the link
- * state changes. Drivers are by no means allowed to mess with the
+ /**
+ * @link_change_notify: Called to inform a PHY device driver
+ * when the core is about to change the link state. This
+ * callback is supposed to be used as fixup hook for drivers
+ * that need to take action when the link state
+ * changes. Drivers are by no means allowed to mess with the
* PHY device structure in their implementations.
*/
void (*link_change_notify)(struct phy_device *dev);
- /*
- * Phy specific driver override for reading a MMD register.
- * This function is optional for PHY specific drivers. When
- * not provided, the default MMD read function will be used
- * by phy_read_mmd(), which will use either a direct read for
- * Clause 45 PHYs or an indirect read for Clause 22 PHYs.
- * devnum is the MMD device number within the PHY device,
- * regnum is the register within the selected MMD device.
+ /**
+ * @read_mmd: PHY specific driver override for reading a MMD
+ * register. This function is optional for PHY specific
+ * drivers. When not provided, the default MMD read function
+ * will be used by phy_read_mmd(), which will use either a
+ * direct read for Clause 45 PHYs or an indirect read for
+ * Clause 22 PHYs. devnum is the MMD device number within the
+ * PHY device, regnum is the register within the selected MMD
+ * device.
*/
int (*read_mmd)(struct phy_device *dev, int devnum, u16 regnum);
- /*
- * Phy specific driver override for writing a MMD register.
- * This function is optional for PHY specific drivers. When
- * not provided, the default MMD write function will be used
- * by phy_write_mmd(), which will use either a direct write for
- * Clause 45 PHYs, or an indirect write for Clause 22 PHYs.
- * devnum is the MMD device number within the PHY device,
- * regnum is the register within the selected MMD device.
- * val is the value to be written.
+ /**
+ * @write_mmd: PHY specific driver override for writing a MMD
+ * register. This function is optional for PHY specific
+ * drivers. When not provided, the default MMD write function
+ * will be used by phy_write_mmd(), which will use either a
+ * direct write for Clause 45 PHYs, or an indirect write for
+ * Clause 22 PHYs. devnum is the MMD device number within the
+ * PHY device, regnum is the register within the selected MMD
+ * device. val is the value to be written.
*/
int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum,
u16 val);
+ /** @read_page: Return the current PHY register page number */
int (*read_page)(struct phy_device *dev);
+ /** @write_page: Set the current PHY register page number */
int (*write_page)(struct phy_device *dev, int page);
- /* Get the size and type of the eeprom contained within a plug-in
- * module */
+ /**
+ * @module_info: Get the size and type of the eeprom contained
+ * within a plug-in module
+ */
int (*module_info)(struct phy_device *dev,
struct ethtool_modinfo *modinfo);
- /* Get the eeprom information from the plug-in module */
+ /**
+ * @module_eeprom: Get the eeprom information from the plug-in
+ * module
+ */
int (*module_eeprom)(struct phy_device *dev,
struct ethtool_eeprom *ee, u8 *data);
- /* Start a cable test */
+ /** @cable_test_start: Start a cable test */
int (*cable_test_start)(struct phy_device *dev);
- /* Start a raw TDR cable test */
+ /** @cable_test_tdr_start: Start a raw TDR cable test */
int (*cable_test_tdr_start)(struct phy_device *dev,
const struct phy_tdr_config *config);
- /* Once per second, or on interrupt, request the status of the
- * test.
+ /**
+ * @cable_test_get_status: Once per second, or on interrupt,
+ * request the status of the test.
*/
int (*cable_test_get_status)(struct phy_device *dev, bool *finished);
- /* Get statistics from the phy using ethtool */
+ /* Get statistics from the PHY using ethtool */
+ /** @get_sset_count: Number of statistic counters */
int (*get_sset_count)(struct phy_device *dev);
+ /** @get_strings: Names of the statistic counters */
void (*get_strings)(struct phy_device *dev, u8 *data);
+ /** @get_stats: Return the statistic counter values */
void (*get_stats)(struct phy_device *dev,
struct ethtool_stats *stats, u64 *data);
/* Get and Set PHY tunables */
+ /** @get_tunable: Return the value of a tunable */
int (*get_tunable)(struct phy_device *dev,
struct ethtool_tunable *tuna, void *data);
+ /** @set_tunable: Set the value of a tunable */
int (*set_tunable)(struct phy_device *dev,
struct ethtool_tunable *tuna,
const void *data);
+ /** @set_loopback: Set the loopback mood of the PHY */
int (*set_loopback)(struct phy_device *dev, bool enable);
+ /** @get_sqi: Get the signal quality indication */
int (*get_sqi)(struct phy_device *dev);
+ /** @get_sqi_max: Get the maximum signal quality indication */
int (*get_sqi_max)(struct phy_device *dev);
};
#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \
@@ -890,6 +1033,24 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum,
*/
int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
+/**
+ * phy_read_mmd_poll_timeout - Periodically poll a PHY register until a
+ * condition is met or a timeout occurs
+ *
+ * @phydev: The phy_device struct
+ * @devaddr: The MMD to read from
+ * @regnum: The register on the MMD to read
+ * @val: Variable to read the register into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0
+ * tight-loops). Should be less than ~20ms since usleep_range
+ * is used (see Documentation/timers/timers-howto.rst).
+ * @timeout_us: Timeout in us, 0 means never timeout
+ * @sleep_before_read: if it is true, sleep @sleep_us before read.
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ */
#define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \
sleep_us, timeout_us, sleep_before_read) \
({ \
@@ -1161,7 +1322,7 @@ static inline bool phy_is_internal(struct phy_device *phydev)
/**
* phy_interface_mode_is_rgmii - Convenience function for testing if a
* PHY interface mode is RGMII (all variants)
- * @mode: the phy_interface_t enum
+ * @mode: the &phy_interface_t enum
*/
static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
{
@@ -1170,11 +1331,11 @@ static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
};
/**
- * phy_interface_mode_is_8023z() - does the phy interface mode use 802.3z
+ * phy_interface_mode_is_8023z() - does the PHY interface mode use 802.3z
* negotiation
* @mode: one of &enum phy_interface_t
*
- * Returns true if the phy interface mode uses the 16-bit negotiation
+ * Returns true if the PHY interface mode uses the 16-bit negotiation
* word as defined in 802.3z. (See 802.3-2015 37.2.1 Config_Reg encoding)
*/
static inline bool phy_interface_mode_is_8023z(phy_interface_t mode)
@@ -1193,7 +1354,7 @@ static inline bool phy_interface_is_rgmii(struct phy_device *phydev)
return phy_interface_mode_is_rgmii(phydev->interface);
};
-/*
+/**
* phy_is_pseudo_fixed_link - Convenience function for testing if this
* PHY is the CPU port facing side of an Ethernet switch, or similar.
* @phydev: the phy_device struct
@@ -1566,8 +1727,9 @@ static inline int mdiobus_register_board_info(const struct mdio_board_info *i,
/**
- * module_phy_driver() - Helper macro for registering PHY drivers
+ * phy_module_driver() - Helper macro for registering PHY drivers
* @__phy_drivers: array of PHY drivers to register
+ * @__count: Numbers of members in array
*
* Helper macro for PHY drivers which do not do anything special in module
* init/exit. Each module may only use this macro once, and calling it
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c738abeb3265..dc763ca9413c 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -96,7 +96,8 @@ struct inet_connection_sock {
void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
struct hlist_node icsk_listen_portaddr_node;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
- __u8 icsk_ca_state:6,
+ __u8 icsk_ca_state:5,
+ icsk_ca_initialized:1,
icsk_ca_setsockopt:1,
icsk_ca_dst_locked:1;
__u8 icsk_retransmits;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 852f0d71dd40..3601dea931a6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1104,7 +1104,7 @@ void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
- bool reinit, bool cap_net_admin);
+ bool cap_net_admin);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 0ac4e7fba086..3105bbb6cdcf 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -710,8 +710,8 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid);
int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr);
int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr);
-int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port,
- struct sk_buff *skb);
+void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
+ struct sk_buff *clone);
void ocelot_get_txtstamp(struct ocelot *ocelot);
void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu);
int ocelot_get_max_mtu(struct ocelot *ocelot, int port);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8dda13880957..a22812561064 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -124,6 +124,7 @@ enum bpf_cmd {
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
BPF_LINK_DETACH,
+ BPF_PROG_BIND_MAP,
};
enum bpf_map_type {
@@ -658,6 +659,12 @@ union bpf_attr {
__u32 flags;
} iter_create;
+ struct { /* struct used by BPF_PROG_BIND_MAP command */
+ __u32 prog_fd;
+ __u32 map_fd;
+ __u32 flags; /* extra flags */
+ } prog_bind_map;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1447,8 +1454,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if the *skb* task belongs to the cgroup2.
- * * 1, if the *skb* task does not belong to the cgroup2.
+ * * 0, if current task belongs to the cgroup2.
+ * * 1, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
@@ -3349,38 +3356,38 @@ union bpf_attr {
* Description
* Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *task*, which is a valid
- * pointer to struct task_struct. To store the stacktrace, the
- * bpf program provides *buf* with a nonnegative *size*.
+ * pointer to **struct task_struct**. To store the stacktrace, the
+ * bpf program provides *buf* with a nonnegative *size*.
*
* The last argument, *flags*, holds the number of stack frames to
* skip (from 0 to 255), masked with
@@ -3410,12 +3417,12 @@ union bpf_attr {
* long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
* Description
* Load header option. Support reading a particular TCP header
- * option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**).
*
* If *flags* is 0, it will search the option from the
- * sock_ops->skb_data. The comment in "struct bpf_sock_ops"
+ * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops**
* has details on what skb_data contains under different
- * sock_ops->op.
+ * *skops*\ **->op**.
*
* The first byte of the *searchby_res* specifies the
* kind that it wants to search.
@@ -3435,7 +3442,7 @@ union bpf_attr {
* [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
*
* To search for the standard window scale option (3),
- * the searchby_res should be [ 3, 0, 0, .... 0 ].
+ * the *searchby_res* should be [ 3, 0, 0, .... 0 ].
* Note, kind-length must be 0 for regular option.
*
* Searching for No-Op (0) and End-of-Option-List (1) are
@@ -3445,27 +3452,30 @@ union bpf_attr {
* of a header option.
*
* Supported flags:
+ *
* * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
* saved_syn packet or the just-received syn packet.
*
* Return
- * >0 when found, the header option is copied to *searchby_res*.
- * The return value is the total length copied.
+ * > 0 when found, the header option is copied to *searchby_res*.
+ * The return value is the total length copied. On failure, a
+ * negative error code is returned:
*
- * **-EINVAL** If param is invalid
+ * **-EINVAL** if a parameter is invalid.
*
- * **-ENOMSG** The option is not found
+ * **-ENOMSG** if the option is not found.
*
- * **-ENOENT** No syn packet available when
- * **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ * **-ENOENT** if no syn packet is available when
+ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used.
*
- * **-ENOSPC** Not enough space. Only *len* number of
- * bytes are copied.
+ * **-ENOSPC** if there is not enough space. Only *len* number of
+ * bytes are copied.
*
- * **-EFAULT** Cannot parse the header options in the packet
+ * **-EFAULT** on failure to parse the header options in the
+ * packet.
*
- * **-EPERM** This helper cannot be used under the
- * current sock_ops->op.
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
*
* long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
* Description
@@ -3483,44 +3493,44 @@ union bpf_attr {
* by searching the same option in the outgoing skb.
*
* This helper can only be called during
- * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
*
* Return
* 0 on success, or negative error in case of failure:
*
- * **-EINVAL** If param is invalid
+ * **-EINVAL** If param is invalid.
*
- * **-ENOSPC** Not enough space in the header.
- * Nothing has been written
+ * **-ENOSPC** if there is not enough space in the header.
+ * Nothing has been written
*
- * **-EEXIST** The option has already existed
+ * **-EEXIST** if the option already exists.
*
- * **-EFAULT** Cannot parse the existing header options
+ * **-EFAULT** on failrue to parse the existing header options.
*
- * **-EPERM** This helper cannot be used under the
- * current sock_ops->op.
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
*
* long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
* Description
* Reserve *len* bytes for the bpf header option. The
- * space will be used by bpf_store_hdr_opt() later in
- * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * space will be used by **bpf_store_hdr_opt**\ () later in
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
*
- * If bpf_reserve_hdr_opt() is called multiple times,
+ * If **bpf_reserve_hdr_opt**\ () is called multiple times,
* the total number of bytes will be reserved.
*
* This helper can only be called during
- * BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**.
*
* Return
* 0 on success, or negative error in case of failure:
*
- * **-EINVAL** if param is invalid
+ * **-EINVAL** if a parameter is invalid.
*
- * **-ENOSPC** Not enough space in the header.
+ * **-ENOSPC** if there is not enough space in the header.
*
- * **-EPERM** This helper cannot be used under the
- * current sock_ops->op.
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
*
* void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
* Description
@@ -3560,9 +3570,9 @@ union bpf_attr {
*
* long bpf_d_path(struct path *path, char *buf, u32 sz)
* Description
- * Return full path for given 'struct path' object, which
- * needs to be the kernel BTF 'path' object. The path is
- * returned in the provided buffer 'buf' of size 'sz' and
+ * Return full path for given **struct path** object, which
+ * needs to be the kernel BTF *path* object. The path is
+ * returned in the provided buffer *buf* of size *sz* and
* is zero terminated.
*
* Return
@@ -3573,7 +3583,7 @@ union bpf_attr {
* long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
* Description
* Read *size* bytes from user space address *user_ptr* and store
- * the data in *dst*. This is a wrapper of copy_from_user().
+ * the data in *dst*. This is a wrapper of **copy_from_user**\ ().
* Return
* 0 on success, or a negative error in case of failure.
*/
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 75a2ac479247..4c687686aa8f 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -457,6 +457,8 @@ enum {
MDBA_MDB_EATTR_TIMER,
MDBA_MDB_EATTR_SRC_LIST,
MDBA_MDB_EATTR_GROUP_MODE,
+ MDBA_MDB_EATTR_SOURCE,
+ MDBA_MDB_EATTR_RTPROT,
__MDBA_MDB_EATTR_MAX
};
#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
@@ -516,6 +518,8 @@ struct br_mdb_entry {
__u8 state;
#define MDB_FLAGS_OFFLOAD (1 << 0)
#define MDB_FLAGS_FAST_LEAVE (1 << 1)
+#define MDB_FLAGS_STAR_EXCL (1 << 2)
+#define MDB_FLAGS_BLOCKED (1 << 3)
__u8 flags;
__u16 vid;
struct {
@@ -530,10 +534,23 @@ struct br_mdb_entry {
enum {
MDBA_SET_ENTRY_UNSPEC,
MDBA_SET_ENTRY,
+ MDBA_SET_ENTRY_ATTRS,
__MDBA_SET_ENTRY_MAX,
};
#define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1)
+/* [MDBA_SET_ENTRY_ATTRS] = {
+ * [MDBE_ATTR_xxx]
+ * ...
+ * }
+ */
+enum {
+ MDBE_ATTR_UNSPEC,
+ MDBE_ATTR_SOURCE,
+ __MDBE_ATTR_MAX,
+};
+#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
+
/* Embedded inside LINK_XSTATS_TYPE_BRIDGE */
enum {
BRIDGE_XSTATS_UNSPEC,
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e046fb7d17cd..e5fd31268ae0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -898,6 +898,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
struct bpf_prog *old,
struct bpf_prog *new)
{
+ u8 *old_addr, *new_addr, *old_bypass_addr;
struct prog_poke_elem *elem;
struct bpf_array_aux *aux;
@@ -918,12 +919,13 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* there could be danger of use after free otherwise.
* 2) Initially when we start tracking aux, the program
* is not JITed yet and also does not have a kallsyms
- * entry. We skip these as poke->ip_stable is not
- * active yet. The JIT will do the final fixup before
- * setting it stable. The various poke->ip_stable are
- * successively activated, so tail call updates can
- * arrive from here while JIT is still finishing its
- * final fixup for non-activated poke entries.
+ * entry. We skip these as poke->tailcall_target_stable
+ * is not active yet. The JIT will do the final fixup
+ * before setting it stable. The various
+ * poke->tailcall_target_stable are successively
+ * activated, so tail call updates can arrive from here
+ * while JIT is still finishing its final fixup for
+ * non-activated poke entries.
* 3) On program teardown, the program's kallsym entry gets
* removed out of RCU callback, but we can only untrack
* from sleepable context, therefore bpf_arch_text_poke()
@@ -940,7 +942,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* 5) Any other error happening below from bpf_arch_text_poke()
* is a unexpected bug.
*/
- if (!READ_ONCE(poke->ip_stable))
+ if (!READ_ONCE(poke->tailcall_target_stable))
continue;
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
continue;
@@ -948,12 +950,39 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
poke->tail_call.key != key)
continue;
- ret = bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP,
- old ? (u8 *)old->bpf_func +
- poke->adj_off : NULL,
- new ? (u8 *)new->bpf_func +
- poke->adj_off : NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
+ old_bypass_addr = old ? NULL : poke->bypass_addr;
+ old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
+ new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
+
+ if (new) {
+ ret = bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, new_addr);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ if (!old) {
+ ret = bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ poke->bypass_addr,
+ NULL);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ }
+ } else {
+ ret = bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ old_bypass_addr,
+ poke->bypass_addr);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ /* let other CPUs finish the execution of program
+ * so that it will not possible to expose them
+ * to invalid nop, stack unwind, nop state
+ */
+ if (!ret)
+ synchronize_rcu();
+ ret = bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP,
+ old_addr, NULL);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ }
}
}
}
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 75be02799c0f..6edff97ad594 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -249,9 +249,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
.map_owner_storage_ptr = inode_storage_ptr,
};
-BTF_ID_LIST(bpf_inode_storage_btf_ids)
-BTF_ID_UNUSED
-BTF_ID(struct, inode)
+BTF_ID_LIST_SINGLE(bpf_inode_storage_btf_ids, struct, inode)
const struct bpf_func_proto bpf_inode_storage_get_proto = {
.func = bpf_inode_storage_get,
@@ -259,9 +257,9 @@ const struct bpf_func_proto bpf_inode_storage_get_proto = {
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_btf_id = &bpf_inode_storage_btf_ids[0],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
- .btf_id = bpf_inode_storage_btf_ids,
};
const struct bpf_func_proto bpf_inode_storage_delete_proto = {
@@ -270,5 +268,5 @@ const struct bpf_func_proto bpf_inode_storage_delete_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
- .btf_id = bpf_inode_storage_btf_ids,
+ .arg2_btf_id = &bpf_inode_storage_btf_ids[0],
};
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index ffa7d11fc2bd..5d3a7af9ba9b 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -159,7 +159,7 @@ void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem)
{
RCU_INIT_POINTER(selem->local_storage, local_storage);
- hlist_add_head(&selem->snode, &local_storage->list);
+ hlist_add_head_rcu(&selem->snode, &local_storage->list);
}
void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index f9ac6935ab3c..5d3c36e13139 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4193,19 +4193,6 @@ again:
return true;
}
-int btf_resolve_helper_id(struct bpf_verifier_log *log,
- const struct bpf_func_proto *fn, int arg)
-{
- int id;
-
- if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID || !btf_vmlinux)
- return -EINVAL;
- id = fn->btf_id[arg];
- if (!id || id > btf_vmlinux->nr_types)
- return -EINVAL;
- return id;
-}
-
static int __get_type_size(struct btf *btf, u32 btf_id,
const struct btf_type **bad_type)
{
@@ -4772,7 +4759,7 @@ static int btf_id_cmp_func(const void *a, const void *b)
return *pa - *pb;
}
-bool btf_id_set_contains(struct btf_id_set *set, u32 id)
+bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
{
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ed0b3578867c..c4811b139caa 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -98,6 +98,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->jit_requested = ebpf_jit_enabled();
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
+ mutex_init(&fp->aux->used_maps_mutex);
return fp;
}
@@ -253,6 +254,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
void __bpf_prog_free(struct bpf_prog *fp)
{
if (fp->aux) {
+ mutex_destroy(&fp->aux->used_maps_mutex);
free_percpu(fp->aux->stats);
kfree(fp->aux->poke_tab);
kfree(fp->aux);
@@ -773,7 +775,8 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
if (size > poke_tab_max)
return -ENOSPC;
- if (poke->ip || poke->ip_stable || poke->adj_off)
+ if (poke->tailcall_target || poke->tailcall_target_stable ||
+ poke->tailcall_bypass || poke->adj_off || poke->bypass_addr)
return -EINVAL;
switch (poke->reason) {
@@ -1747,8 +1750,9 @@ bool bpf_prog_array_compatible(struct bpf_array *array,
static int bpf_check_tail_call(const struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
- int i;
+ int i, ret = 0;
+ mutex_lock(&aux->used_maps_mutex);
for (i = 0; i < aux->used_map_cnt; i++) {
struct bpf_map *map = aux->used_maps[i];
struct bpf_array *array;
@@ -1757,11 +1761,15 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
continue;
array = container_of(map, struct bpf_array, map);
- if (!bpf_prog_array_compatible(array, fp))
- return -EINVAL;
+ if (!bpf_prog_array_compatible(array, fp)) {
+ ret = -EINVAL;
+ goto out;
+ }
}
- return 0;
+out:
+ mutex_unlock(&aux->used_maps_mutex);
+ return ret;
}
static void bpf_prog_select_func(struct bpf_prog *fp)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index a2fa006f430e..06065fa27124 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -665,18 +665,17 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
return __bpf_get_stack(regs, task, NULL, buf, size, flags);
}
-BTF_ID_LIST(bpf_get_task_stack_btf_ids)
-BTF_ID(struct, task_struct)
+BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct)
const struct bpf_func_proto bpf_get_task_stack_proto = {
.func = bpf_get_task_stack,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_get_task_stack_btf_ids[0],
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
- .btf_id = bpf_get_task_stack_btf_ids,
};
BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 178c147350f5..34268491d2de 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2345,12 +2345,8 @@ void bpf_link_put(struct bpf_link *link)
if (!atomic64_dec_and_test(&link->refcnt))
return;
- if (in_atomic()) {
- INIT_WORK(&link->work, bpf_link_put_deferred);
- schedule_work(&link->work);
- } else {
- bpf_link_free(link);
- }
+ INIT_WORK(&link->work, bpf_link_put_deferred);
+ schedule_work(&link->work);
}
static int bpf_link_release(struct inode *inode, struct file *filp)
@@ -3162,21 +3158,25 @@ static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
const struct bpf_map *map;
int i;
+ mutex_lock(&prog->aux->used_maps_mutex);
for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
map = prog->aux->used_maps[i];
if (map == (void *)addr) {
*type = BPF_PSEUDO_MAP_FD;
- return map;
+ goto out;
}
if (!map->ops->map_direct_value_meta)
continue;
if (!map->ops->map_direct_value_meta(map, addr, off)) {
*type = BPF_PSEUDO_MAP_VALUE;
- return map;
+ goto out;
}
}
+ map = NULL;
- return NULL;
+out:
+ mutex_unlock(&prog->aux->used_maps_mutex);
+ return map;
}
static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
@@ -3294,6 +3294,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
memcpy(info.tag, prog->tag, sizeof(prog->tag));
memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
+ mutex_lock(&prog->aux->used_maps_mutex);
ulen = info.nr_map_ids;
info.nr_map_ids = prog->aux->used_map_cnt;
ulen = min_t(u32, info.nr_map_ids, ulen);
@@ -3303,9 +3304,12 @@ static int bpf_prog_get_info_by_fd(struct file *file,
for (i = 0; i < ulen; i++)
if (put_user(prog->aux->used_maps[i]->id,
- &user_map_ids[i]))
+ &user_map_ids[i])) {
+ mutex_unlock(&prog->aux->used_maps_mutex);
return -EFAULT;
+ }
}
+ mutex_unlock(&prog->aux->used_maps_mutex);
err = set_info_rec_size(&info);
if (err)
@@ -4153,6 +4157,66 @@ static int bpf_iter_create(union bpf_attr *attr)
return err;
}
+#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
+
+static int bpf_prog_bind_map(union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct bpf_map **used_maps_old, **used_maps_new;
+ int i, ret = 0;
+
+ if (CHECK_ATTR(BPF_PROG_BIND_MAP))
+ return -EINVAL;
+
+ if (attr->prog_bind_map.flags)
+ return -EINVAL;
+
+ prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ map = bpf_map_get(attr->prog_bind_map.map_fd);
+ if (IS_ERR(map)) {
+ ret = PTR_ERR(map);
+ goto out_prog_put;
+ }
+
+ mutex_lock(&prog->aux->used_maps_mutex);
+
+ used_maps_old = prog->aux->used_maps;
+
+ for (i = 0; i < prog->aux->used_map_cnt; i++)
+ if (used_maps_old[i] == map)
+ goto out_unlock;
+
+ used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
+ sizeof(used_maps_new[0]),
+ GFP_KERNEL);
+ if (!used_maps_new) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ memcpy(used_maps_new, used_maps_old,
+ sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
+ used_maps_new[prog->aux->used_map_cnt] = map;
+
+ prog->aux->used_map_cnt++;
+ prog->aux->used_maps = used_maps_new;
+
+ kfree(used_maps_old);
+
+out_unlock:
+ mutex_unlock(&prog->aux->used_maps_mutex);
+
+ if (ret)
+ bpf_map_put(map);
+out_prog_put:
+ bpf_prog_put(prog);
+ return ret;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr;
@@ -4286,6 +4350,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_LINK_DETACH:
err = link_detach(&attr);
break;
+ case BPF_PROG_BIND_MAP:
+ err = bpf_prog_bind_map(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 99af4cea1102..5b6af30bfbcd 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -22,7 +22,8 @@ struct bpf_iter_seq_task_info {
};
static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
- u32 *tid)
+ u32 *tid,
+ bool skip_if_dup_files)
{
struct task_struct *task = NULL;
struct pid *pid;
@@ -36,6 +37,12 @@ retry:
if (!task) {
++*tid;
goto retry;
+ } else if (skip_if_dup_files && task->tgid != task->pid &&
+ task->files == task->group_leader->files) {
+ put_task_struct(task);
+ task = NULL;
+ ++*tid;
+ goto retry;
}
}
rcu_read_unlock();
@@ -48,7 +55,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
struct bpf_iter_seq_task_info *info = seq->private;
struct task_struct *task;
- task = task_seq_get_next(info->common.ns, &info->tid);
+ task = task_seq_get_next(info->common.ns, &info->tid, false);
if (!task)
return NULL;
@@ -65,7 +72,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
++info->tid;
put_task_struct((struct task_struct *)v);
- task = task_seq_get_next(info->common.ns, &info->tid);
+ task = task_seq_get_next(info->common.ns, &info->tid, false);
if (!task)
return NULL;
@@ -148,7 +155,7 @@ again:
curr_files = *fstruct;
curr_fd = info->fd;
} else {
- curr_task = task_seq_get_next(ns, &curr_tid);
+ curr_task = task_seq_get_next(ns, &curr_tid, true);
if (!curr_task)
return NULL;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 86fdebb5ffd8..42dee5dcbc74 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,7 +238,6 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
- u32 btf_id;
};
struct btf *btf_vmlinux;
@@ -436,6 +435,15 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
return type == ARG_PTR_TO_SOCK_COMMON;
}
+static bool arg_type_may_be_null(enum bpf_arg_type type)
+{
+ return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
+ type == ARG_PTR_TO_MEM_OR_NULL ||
+ type == ARG_PTR_TO_CTX_OR_NULL ||
+ type == ARG_PTR_TO_SOCKET_OR_NULL ||
+ type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
+}
+
/* Determine whether the function releases some resources allocated by another
* function call. The first reference type argument will be assumed to be
* released by release_reference().
@@ -1490,6 +1498,13 @@ static int check_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
+ if (code == (BPF_JMP | BPF_CALL) &&
+ insn[i].imm == BPF_FUNC_tail_call &&
+ insn[i].src_reg != BPF_PSEUDO_CALL)
+ subprog[cur_subprog].has_tail_call = true;
+ if (BPF_CLASS(code) == BPF_LD &&
+ (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
+ subprog[cur_subprog].has_ld_abs = true;
if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
@@ -2979,10 +2994,37 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
+ bool tail_call_reachable = false;
int ret_insn[MAX_CALL_FRAMES];
int ret_prog[MAX_CALL_FRAMES];
+ int j;
process_func:
+ /* protect against potential stack overflow that might happen when
+ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+ * depth for such case down to 256 so that the worst case scenario
+ * would result in 8k stack size (32 which is tailcall limit * 256 =
+ * 8k).
+ *
+ * To get the idea what might happen, see an example:
+ * func1 -> sub rsp, 128
+ * subfunc1 -> sub rsp, 256
+ * tailcall1 -> add rsp, 256
+ * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
+ * subfunc2 -> sub rsp, 64
+ * subfunc22 -> sub rsp, 128
+ * tailcall2 -> add rsp, 128
+ * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
+ *
+ * tailcall will unwind the current stack frame but it will not get rid
+ * of caller's stack as shown on the example above.
+ */
+ if (idx && subprog[idx].has_tail_call && depth >= 256) {
+ verbose(env,
+ "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
+ depth);
+ return -EACCES;
+ }
/* round up to 32-bytes, since this is granularity
* of interpreter stack size
*/
@@ -3011,6 +3053,10 @@ continue_func:
i);
return -EFAULT;
}
+
+ if (subprog[idx].has_tail_call)
+ tail_call_reachable = true;
+
frame++;
if (frame >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep !\n",
@@ -3019,6 +3065,15 @@ continue_func:
}
goto process_func;
}
+ /* if tail call got detected across bpf2bpf calls then mark each of the
+ * currently present subprog frames as tail call reachable subprogs;
+ * this info will be utilized by JIT so that we will be preserving the
+ * tail call counter throughout bpf2bpf calls combined with tailcalls
+ */
+ if (tail_call_reachable)
+ for (j = 0; j < frame; j++)
+ subprog[ret_prog[j]].tail_call_reachable = true;
+
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
*/
@@ -3594,18 +3649,6 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
struct bpf_func_state *state = func(env, reg);
int err, min_off, max_off, i, j, slot, spi;
- if (reg->type != PTR_TO_STACK) {
- /* Allow zero-byte read from NULL, regardless of pointer type */
- if (zero_size_allowed && access_size == 0 &&
- register_is_null(reg))
- return 0;
-
- verbose(env, "R%d type=%s expected=%s\n", regno,
- reg_type_str[reg->type],
- reg_type_str[PTR_TO_STACK]);
- return -EACCES;
- }
-
if (tnum_is_const(reg->var_off)) {
min_off = max_off = reg->var_off.value + reg->off;
err = __check_stack_boundary(env, regno, min_off, access_size,
@@ -3750,9 +3793,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
access_size, zero_size_allowed,
"rdwr",
&env->prog->aux->max_rdwr_access);
- default: /* scalar_value|ptr_to_stack or invalid ptr */
+ case PTR_TO_STACK:
return check_stack_boundary(env, regno, access_size,
zero_size_allowed, meta);
+ default: /* scalar_value or invalid ptr */
+ /* Allow zero-byte read from NULL, regardless of pointer type */
+ if (zero_size_allowed && access_size == 0 &&
+ register_is_null(reg))
+ return 0;
+
+ verbose(env, "R%d type=%s expected=%s\n", regno,
+ reg_type_str[reg->type],
+ reg_type_str[PTR_TO_STACK]);
+ return -EACCES;
}
}
@@ -3784,10 +3837,6 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
struct bpf_map *map = reg->map_ptr;
u64 val = reg->var_off.value;
- if (reg->type != PTR_TO_MAP_VALUE) {
- verbose(env, "R%d is not a pointer to map_value\n", regno);
- return -EINVAL;
- }
if (!is_const) {
verbose(env,
"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
@@ -3854,12 +3903,6 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
type == ARG_CONST_SIZE_OR_ZERO;
}
-static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type)
-{
- return type == ARG_PTR_TO_ALLOC_MEM ||
- type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
-}
-
static bool arg_type_is_alloc_size(enum bpf_arg_type type)
{
return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
@@ -3908,14 +3951,114 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
return 0;
}
+struct bpf_reg_types {
+ const enum bpf_reg_type types[10];
+};
+
+static const struct bpf_reg_types map_key_value_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ },
+};
+
+static const struct bpf_reg_types sock_types = {
+ .types = {
+ PTR_TO_SOCK_COMMON,
+ PTR_TO_SOCKET,
+ PTR_TO_TCP_SOCK,
+ PTR_TO_XDP_SOCK,
+ },
+};
+
+static const struct bpf_reg_types mem_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ PTR_TO_MEM,
+ PTR_TO_RDONLY_BUF,
+ PTR_TO_RDWR_BUF,
+ },
+};
+
+static const struct bpf_reg_types int_ptr_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ },
+};
+
+static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
+static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
+static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
+static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
+static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
+static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
+
+static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
+ [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
+ [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
+ [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
+ [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types,
+ [ARG_CONST_SIZE] = &scalar_types,
+ [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
+ [ARG_PTR_TO_CTX] = &context_types,
+ [ARG_PTR_TO_CTX_OR_NULL] = &context_types,
+ [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
+ [ARG_PTR_TO_SOCKET] = &fullsock_types,
+ [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
+ [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
+ [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
+ [ARG_PTR_TO_MEM] = &mem_types,
+ [ARG_PTR_TO_MEM_OR_NULL] = &mem_types,
+ [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
+ [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
+ [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
+ [ARG_PTR_TO_INT] = &int_ptr_types,
+ [ARG_PTR_TO_LONG] = &int_ptr_types,
+};
+
+static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
+ const struct bpf_reg_types *compatible)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ enum bpf_reg_type expected, type = reg->type;
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
+ expected = compatible->types[i];
+ if (expected == NOT_INIT)
+ break;
+
+ if (type == expected)
+ return 0;
+ }
+
+ verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
+ for (j = 0; j + 1 < i; j++)
+ verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
+ verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
+ return -EACCES;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
{
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- enum bpf_reg_type expected_type, type = reg->type;
enum bpf_arg_type arg_type = fn->arg_type[arg];
+ const struct bpf_reg_types *compatible;
+ enum bpf_reg_type type = reg->type;
int err = 0;
if (arg_type == ARG_DONTCARE)
@@ -3948,125 +4091,48 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return err;
}
- if (arg_type == ARG_PTR_TO_MAP_KEY ||
- arg_type == ARG_PTR_TO_MAP_VALUE ||
- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
- expected_type = PTR_TO_STACK;
- if (register_is_null(reg) &&
- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
- /* final test in check_stack_boundary() */;
- else if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_CONST_SIZE ||
- arg_type == ARG_CONST_SIZE_OR_ZERO ||
- arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) {
- expected_type = SCALAR_VALUE;
- if (type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_CONST_MAP_PTR) {
- expected_type = CONST_PTR_TO_MAP;
- if (type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_PTR_TO_CTX ||
- arg_type == ARG_PTR_TO_CTX_OR_NULL) {
- expected_type = PTR_TO_CTX;
- if (!(register_is_null(reg) &&
- arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
- if (type != expected_type)
- goto err_type;
- err = check_ctx_reg(env, reg, regno);
- if (err < 0)
- return err;
+ if (register_is_null(reg) && arg_type_may_be_null(arg_type))
+ /* A NULL register has a SCALAR_VALUE type, so skip
+ * type checking.
+ */
+ goto skip_type_check;
+
+ compatible = compatible_reg_types[arg_type];
+ if (!compatible) {
+ verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
+ return -EFAULT;
+ }
+
+ err = check_reg_type(env, regno, compatible);
+ if (err)
+ return err;
+
+ if (type == PTR_TO_BTF_ID) {
+ const u32 *btf_id = fn->arg_btf_id[arg];
+
+ if (!btf_id) {
+ verbose(env, "verifier internal error: missing BTF ID\n");
+ return -EFAULT;
}
- } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
- expected_type = PTR_TO_SOCK_COMMON;
- /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
- if (!type_is_sk_pointer(type))
- goto err_type;
- if (reg->ref_obj_id) {
- if (meta->ref_obj_id) {
- verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
- regno, reg->ref_obj_id,
- meta->ref_obj_id);
- return -EFAULT;
- }
- meta->ref_obj_id = reg->ref_obj_id;
- }
- } else if (arg_type == ARG_PTR_TO_SOCKET ||
- arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
- expected_type = PTR_TO_SOCKET;
- if (!(register_is_null(reg) &&
- arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
- if (type != expected_type)
- goto err_type;
- }
- } else if (arg_type == ARG_PTR_TO_BTF_ID) {
- bool ids_match = false;
-
- expected_type = PTR_TO_BTF_ID;
- if (type != expected_type)
- goto err_type;
- if (!fn->check_btf_id) {
- if (reg->btf_id != meta->btf_id) {
- ids_match = btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
- meta->btf_id);
- if (!ids_match) {
- verbose(env, "Helper has type %s got %s in R%d\n",
- kernel_type_name(meta->btf_id),
- kernel_type_name(reg->btf_id), regno);
- return -EACCES;
- }
- }
- } else if (!fn->check_btf_id(reg->btf_id, arg)) {
- verbose(env, "Helper does not support %s in R%d\n",
- kernel_type_name(reg->btf_id), regno);
+ if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *btf_id)) {
+ verbose(env, "R%d is of type %s but %s is expected\n",
+ regno, kernel_type_name(reg->btf_id), kernel_type_name(*btf_id));
return -EACCES;
}
- if ((reg->off && !ids_match) || !tnum_is_const(reg->var_off) || reg->var_off.value) {
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
regno);
return -EACCES;
}
- } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
- if (meta->func_id == BPF_FUNC_spin_lock) {
- if (process_spin_lock(env, regno, true))
- return -EACCES;
- } else if (meta->func_id == BPF_FUNC_spin_unlock) {
- if (process_spin_lock(env, regno, false))
- return -EACCES;
- } else {
- verbose(env, "verifier internal error\n");
- return -EFAULT;
- }
- } else if (arg_type_is_mem_ptr(arg_type)) {
- expected_type = PTR_TO_STACK;
- /* One exception here. In case function allows for NULL to be
- * passed in as argument, it's a SCALAR_VALUE type. Final test
- * happens during stack boundary checking.
- */
- if (register_is_null(reg) &&
- (arg_type == ARG_PTR_TO_MEM_OR_NULL ||
- arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL))
- /* final test in check_stack_boundary() */;
- else if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != PTR_TO_MEM &&
- type != PTR_TO_RDONLY_BUF &&
- type != PTR_TO_RDWR_BUF &&
- type != expected_type)
- goto err_type;
- meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
- } else if (arg_type_is_alloc_mem_ptr(arg_type)) {
- expected_type = PTR_TO_MEM;
- if (register_is_null(reg) &&
- arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)
- /* final test in check_stack_boundary() */;
- else if (type != expected_type)
- goto err_type;
+ } else if (type == PTR_TO_CTX) {
+ err = check_ctx_reg(env, reg, regno);
+ if (err < 0)
+ return err;
+ }
+
+skip_type_check:
+ if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
regno, reg->ref_obj_id,
@@ -4074,15 +4140,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EFAULT;
}
meta->ref_obj_id = reg->ref_obj_id;
- } else if (arg_type_is_int_ptr(arg_type)) {
- expected_type = PTR_TO_STACK;
- if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != expected_type)
- goto err_type;
- } else {
- verbose(env, "unsupported arg_type %d\n", arg_type);
- return -EFAULT;
}
if (arg_type == ARG_CONST_MAP_PTR) {
@@ -4121,6 +4178,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
+ } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+ if (meta->func_id == BPF_FUNC_spin_lock) {
+ if (process_spin_lock(env, regno, true))
+ return -EACCES;
+ } else if (meta->func_id == BPF_FUNC_spin_unlock) {
+ if (process_spin_lock(env, regno, false))
+ return -EACCES;
+ } else {
+ verbose(env, "verifier internal error\n");
+ return -EFAULT;
+ }
+ } else if (arg_type_is_mem_ptr(arg_type)) {
+ /* The access to this pointer is only checked when we hit the
+ * next is_mem_size argument below.
+ */
+ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
@@ -4186,10 +4259,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
return err;
-err_type:
- verbose(env, "R%d type=%s expected=%s\n", regno,
- reg_type_str[type], reg_type_str[expected_type]);
- return -EACCES;
}
static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
@@ -4224,6 +4293,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
return false;
}
+static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
+{
+ return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
+}
+
static int check_map_func_compatibility(struct bpf_verifier_env *env,
struct bpf_map *map, int func_id)
{
@@ -4339,8 +4413,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_tail_call:
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
- if (env->subprog_cnt > 1) {
- verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
+ if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
return -EINVAL;
}
break;
@@ -4495,10 +4569,22 @@ static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
return count <= 1;
}
+static bool check_btf_id_ok(const struct bpf_func_proto *fn)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++)
+ if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
+ return false;
+
+ return true;
+}
+
static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
+ check_btf_id_ok(fn) &&
check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
@@ -4894,11 +4980,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
- if (!fn->check_btf_id) {
- err = btf_resolve_helper_id(&env->log, fn, i);
- if (err > 0)
- meta.btf_id = err;
- }
err = check_func_arg(env, i, &meta, fn);
if (err)
return err;
@@ -5317,6 +5398,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case CONST_PTR_TO_MAP:
+ /* smin_val represents the known value */
+ if (known && smin_val == 0 && opcode == BPF_ADD)
+ break;
+ /* fall-through */
case PTR_TO_PACKET_END:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
@@ -7461,18 +7546,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
- if (env->subprog_cnt > 1) {
- /* when program has LD_ABS insn JITs and interpreter assume
- * that r1 == ctx == skb which is not the case for callees
- * that can have arbitrary arguments. It's problematic
- * for main prog as well since JITs would need to analyze
- * all functions in order to make proper register save/restore
- * decisions in the main prog. Hence disallow LD_ABS with calls
- */
- verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
- return -EINVAL;
- }
-
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@ -7883,6 +7956,23 @@ err_free:
return ret;
}
+static int check_abnormal_return(struct bpf_verifier_env *env)
+{
+ int i;
+
+ for (i = 1; i < env->subprog_cnt; i++) {
+ if (env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ if (env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
/* The minimum supported BTF func info size */
#define MIN_BPF_FUNCINFO_SIZE 8
#define MAX_FUNCINFO_REC_SIZE 252
@@ -7891,20 +7981,24 @@ static int check_btf_func(struct bpf_verifier_env *env,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
+ const struct btf_type *type, *func_proto, *ret_type;
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
struct bpf_func_info_aux *info_aux = NULL;
- const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
void __user *urecord;
u32 prev_offset = 0;
+ bool scalar_return;
int ret = -ENOMEM;
nfuncs = attr->func_info_cnt;
- if (!nfuncs)
+ if (!nfuncs) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
return 0;
+ }
if (nfuncs != env->subprog_cnt) {
verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
@@ -7952,25 +8046,23 @@ static int check_btf_func(struct bpf_verifier_env *env,
}
/* check insn_off */
+ ret = -EINVAL;
if (i == 0) {
if (krecord[i].insn_off) {
verbose(env,
"nonzero insn_off %u for the first func info record",
krecord[i].insn_off);
- ret = -EINVAL;
goto err_free;
}
} else if (krecord[i].insn_off <= prev_offset) {
verbose(env,
"same or smaller insn offset (%u) than previous func info record (%u)",
krecord[i].insn_off, prev_offset);
- ret = -EINVAL;
goto err_free;
}
if (env->subprog_info[i].start != krecord[i].insn_off) {
verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
- ret = -EINVAL;
goto err_free;
}
@@ -7979,10 +8071,26 @@ static int check_btf_func(struct bpf_verifier_env *env,
if (!type || !btf_type_is_func(type)) {
verbose(env, "invalid type id %d in func info",
krecord[i].type_id);
- ret = -EINVAL;
goto err_free;
}
info_aux[i].linkage = BTF_INFO_VLEN(type->info);
+
+ func_proto = btf_type_by_id(btf, type->type);
+ if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
+ /* btf_func_check() already verified it during BTF load */
+ goto err_free;
+ ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+ scalar_return =
+ btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
+ if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+ if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
@@ -8143,8 +8251,11 @@ static int check_btf_info(struct bpf_verifier_env *env,
struct btf *btf;
int err;
- if (!attr->func_info_cnt && !attr->line_info_cnt)
+ if (!attr->func_info_cnt && !attr->line_info_cnt) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
return 0;
+ }
btf = btf_get_by_fd(attr->prog_btf_fd);
if (IS_ERR(btf))
@@ -9619,6 +9730,18 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
}
}
+static void adjust_poke_descs(struct bpf_prog *prog, u32 len)
+{
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
+ int i, sz = prog->aux->size_poke_tab;
+ struct bpf_jit_poke_descriptor *desc;
+
+ for (i = 0; i < sz; i++) {
+ desc = &tab[i];
+ desc->insn_idx += len - 1;
+ }
+}
+
static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
const struct bpf_insn *patch, u32 len)
{
@@ -9635,6 +9758,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
if (adjust_insn_aux_data(env, new_prog, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
+ adjust_poke_descs(new_prog, len);
return new_prog;
}
@@ -10165,6 +10289,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog, **func, *tmp;
int i, j, subprog_start, subprog_end = 0, len, subprog;
+ struct bpf_map *map_ptr;
struct bpf_insn *insn;
void *old_bpf_func;
int err, num_exentries;
@@ -10232,6 +10357,31 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
+ for (j = 0; j < prog->aux->size_poke_tab; j++) {
+ u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
+ int ret;
+
+ if (!(insn_idx >= subprog_start &&
+ insn_idx <= subprog_end))
+ continue;
+
+ ret = bpf_jit_add_poke_descriptor(func[i],
+ &prog->aux->poke_tab[j]);
+ if (ret < 0) {
+ verbose(env, "adding tail call poke descriptor failed\n");
+ goto out_free;
+ }
+
+ func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;
+
+ map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
+ ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
+ if (ret < 0) {
+ verbose(env, "tracking tail call prog failed\n");
+ goto out_free;
+ }
+ }
+
/* Use bpf_prog_F_tag to indicate functions in stack traces.
* Long term would need debug info to populate names
*/
@@ -10250,6 +10400,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
num_exentries++;
}
func[i]->aux->num_exentries = num_exentries;
+ func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
@@ -10257,6 +10408,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
}
cond_resched();
}
+
+ /* Untrack main program's aux structs so that during map_poke_run()
+ * we will not stumble upon the unfilled poke descriptors; each
+ * of the main program's poke descs got distributed across subprogs
+ * and got tracked onto map, so we are sure that none of them will
+ * be missed after the operation below
+ */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+
+ map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
+ }
+
/* at this point all bpf functions were successfully JITed
* now populate all bpf_calls with correct addresses and
* run last pass of JIT
@@ -10325,9 +10489,16 @@ static int jit_subprogs(struct bpf_verifier_env *env)
bpf_prog_free_unused_jited_linfo(prog);
return 0;
out_free:
- for (i = 0; i < env->subprog_cnt; i++)
- if (func[i])
- bpf_jit_free(func[i]);
+ for (i = 0; i < env->subprog_cnt; i++) {
+ if (!func[i])
+ continue;
+
+ for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
+ map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
+ map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
+ }
+ bpf_jit_free(func[i]);
+ }
kfree(func);
out_undo_insn:
/* cleanup main prog to be interpreted */
@@ -10361,6 +10532,13 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
+ /* When JIT fails the progs with bpf2bpf calls and tail_calls
+ * have to be rejected, since interpreter doesn't support them yet.
+ */
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
+ return -EINVAL;
+ }
for (i = 0; i < prog->len; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
@@ -10524,8 +10702,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
* the program array.
*/
prog->cb_access = 1;
- env->prog->aux->stack_depth = MAX_BPF_STACK;
- env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
+ if (!allow_tail_call_in_subprogs(env))
+ prog->aux->stack_depth = MAX_BPF_STACK;
+ prog->aux->max_pkt_offset = MAX_PACKET_OFF;
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
@@ -10545,6 +10724,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
.reason = BPF_POKE_REASON_TAIL_CALL,
.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
.tail_call.key = bpf_map_key_immediate(aux),
+ .insn_idx = i + delta,
};
ret = bpf_jit_add_poke_descriptor(prog, &desc);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b2a5380eb187..36508f46a8db 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -743,19 +743,18 @@ out:
return err;
}
-BTF_ID_LIST(bpf_seq_printf_btf_ids)
-BTF_ID(struct, seq_file)
+BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
static const struct bpf_func_proto bpf_seq_printf_proto = {
.func = bpf_seq_printf,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM_OR_NULL,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_seq_printf_btf_ids,
};
BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
@@ -763,17 +762,14 @@ BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
return seq_write(m, data, len) ? -EOVERFLOW : 0;
}
-BTF_ID_LIST(bpf_seq_write_btf_ids)
-BTF_ID(struct, seq_file)
-
static const struct bpf_func_proto bpf_seq_write_proto = {
.func = bpf_seq_write,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_seq_write_btf_ids,
};
static __always_inline int
@@ -1118,6 +1114,14 @@ BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
}
BTF_SET_START(btf_allowlist_d_path)
+#ifdef CONFIG_SECURITY
+BTF_ID(func, security_file_permission)
+BTF_ID(func, security_inode_getattr)
+BTF_ID(func, security_file_open)
+#endif
+#ifdef CONFIG_SECURITY_PATH
+BTF_ID(func, security_path_truncate)
+#endif
BTF_ID(func, vfs_truncate)
BTF_ID(func, vfs_fallocate)
BTF_ID(func, dentry_open)
@@ -1130,17 +1134,16 @@ static bool bpf_d_path_allowed(const struct bpf_prog *prog)
return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id);
}
-BTF_ID_LIST(bpf_d_path_btf_ids)
-BTF_ID(struct, path)
+BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
static const struct bpf_func_proto bpf_d_path_proto = {
.func = bpf_d_path,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_d_path_btf_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_d_path_btf_ids,
.allowed = bpf_d_path_allowed,
};
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 0746fe2c2c04..9af99c39b9fd 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -221,7 +221,7 @@ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
* address here, only IPv6 ones
*/
if (br_ip_entry->addr.proto == htons(ETH_P_IPV6) &&
- ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.u.ip6))
+ ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.dst.ip6))
flags &= ~BATADV_MCAST_WANT_NO_RTR6;
list_del(&br_ip_entry->list);
@@ -562,10 +562,10 @@ out:
static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
{
if (src->proto == htons(ETH_P_IP))
- ip_eth_mc_map(src->u.ip4, dst);
+ ip_eth_mc_map(src->dst.ip4, dst);
#if IS_ENABLED(CONFIG_IPV6)
else if (src->proto == htons(ETH_P_IPV6))
- ipv6_eth_mc_map(&src->u.ip6, dst);
+ ipv6_eth_mc_map(&src->dst.ip6, dst);
#endif
else
eth_zero_addr(dst);
@@ -609,11 +609,11 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
continue;
if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
- ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+ ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4))
continue;
if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) &&
- !ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+ !ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4))
continue;
}
@@ -623,11 +623,11 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
continue;
if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
- ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.u.ip6))
+ ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.dst.ip6))
continue;
if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) &&
- IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.u.ip6) >
+ IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.dst.ip6) >
IPV6_ADDR_SCOPE_LINKLOCAL)
continue;
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7629b63f6f30..e28ffadd1371 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -274,14 +274,23 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
+ bool allow_mode_include = true;
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
- p = mdst ? rcu_dereference(mdst->ports) : NULL;
+ if (mdst) {
+ p = rcu_dereference(mdst->ports);
+ if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
+ br_multicast_is_star_g(&mdst->addr))
+ allow_mode_include = false;
+ } else {
+ p = NULL;
+ }
+
while (p || rp) {
struct net_bridge_port *port, *lport, *rport;
- lport = p ? p->port : NULL;
+ lport = p ? p->key.port : NULL;
rport = hlist_entry_safe(rp, struct net_bridge_port, rlist);
if ((unsigned long)lport > (unsigned long)rport) {
@@ -292,6 +301,10 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
local_orig);
goto delivered;
}
+ if ((!allow_mode_include &&
+ p->filter_mode == MCAST_INCLUDE) ||
+ (p->flags & MDB_PG_FLAGS_BLOCKED))
+ goto delivered;
} else {
port = rport;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 00f1651a6aba..e15bab19a012 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -62,19 +62,33 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
e->flags |= MDB_FLAGS_OFFLOAD;
if (flags & MDB_PG_FLAGS_FAST_LEAVE)
e->flags |= MDB_FLAGS_FAST_LEAVE;
+ if (flags & MDB_PG_FLAGS_STAR_EXCL)
+ e->flags |= MDB_FLAGS_STAR_EXCL;
+ if (flags & MDB_PG_FLAGS_BLOCKED)
+ e->flags |= MDB_FLAGS_BLOCKED;
}
-static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
+static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip,
+ struct nlattr **mdb_attrs)
{
memset(ip, 0, sizeof(struct br_ip));
ip->vid = entry->vid;
ip->proto = entry->addr.proto;
- if (ip->proto == htons(ETH_P_IP))
- ip->u.ip4 = entry->addr.u.ip4;
+ switch (ip->proto) {
+ case htons(ETH_P_IP):
+ ip->dst.ip4 = entry->addr.u.ip4;
+ if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE])
+ ip->src.ip4 = nla_get_in_addr(mdb_attrs[MDBE_ATTR_SOURCE]);
+ break;
#if IS_ENABLED(CONFIG_IPV6)
- else
- ip->u.ip6 = entry->addr.u.ip6;
+ case htons(ETH_P_IPV6):
+ ip->dst.ip6 = entry->addr.u.ip6;
+ if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE])
+ ip->src.ip6 = nla_get_in6_addr(mdb_attrs[MDBE_ATTR_SOURCE]);
+ break;
#endif
+ }
+
}
static int __mdb_fill_srcs(struct sk_buff *skb,
@@ -91,14 +105,14 @@ static int __mdb_fill_srcs(struct sk_buff *skb,
return -EMSGSIZE;
hlist_for_each_entry_rcu(ent, &p->src_list, node,
- lockdep_is_held(&p->port->br->multicast_lock)) {
+ lockdep_is_held(&p->key.port->br->multicast_lock)) {
nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
if (!nest_ent)
goto out_cancel_err;
switch (ent->addr.proto) {
case htons(ETH_P_IP):
if (nla_put_in_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
- ent->addr.u.ip4)) {
+ ent->addr.src.ip4)) {
nla_nest_cancel(skb, nest_ent);
goto out_cancel_err;
}
@@ -106,7 +120,7 @@ static int __mdb_fill_srcs(struct sk_buff *skb,
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
if (nla_put_in6_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
- &ent->addr.u.ip6)) {
+ &ent->addr.src.ip6)) {
nla_nest_cancel(skb, nest_ent);
goto out_cancel_err;
}
@@ -146,7 +160,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
memset(&e, 0, sizeof(e));
if (p) {
- ifindex = p->port->dev->ifindex;
+ ifindex = p->key.port->dev->ifindex;
mtimer = &p->timer;
flags = p->flags;
} else {
@@ -158,10 +172,10 @@ static int __mdb_fill_info(struct sk_buff *skb,
e.ifindex = ifindex;
e.vid = mp->addr.vid;
if (mp->addr.proto == htons(ETH_P_IP))
- e.addr.u.ip4 = mp->addr.u.ip4;
+ e.addr.u.ip4 = mp->addr.dst.ip4;
#if IS_ENABLED(CONFIG_IPV6)
if (mp->addr.proto == htons(ETH_P_IPV6))
- e.addr.u.ip6 = mp->addr.u.ip6;
+ e.addr.u.ip6 = mp->addr.dst.ip6;
#endif
e.addr.proto = mp->addr.proto;
nest_ent = nla_nest_start_noflag(skb,
@@ -172,30 +186,47 @@ static int __mdb_fill_info(struct sk_buff *skb,
if (nla_put_nohdr(skb, sizeof(e), &e) ||
nla_put_u32(skb,
MDBA_MDB_EATTR_TIMER,
- br_timer_value(mtimer))) {
- nla_nest_cancel(skb, nest_ent);
- return -EMSGSIZE;
- }
+ br_timer_value(mtimer)))
+ goto nest_err;
+
switch (mp->addr.proto) {
case htons(ETH_P_IP):
- dump_srcs_mode = !!(p && mp->br->multicast_igmp_version == 3);
+ dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
+ if (mp->addr.src.ip4) {
+ if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE,
+ mp->addr.src.ip4))
+ goto nest_err;
+ break;
+ }
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- dump_srcs_mode = !!(p && mp->br->multicast_mld_version == 2);
+ dump_srcs_mode = !!(mp->br->multicast_mld_version == 2);
+ if (!ipv6_addr_any(&mp->addr.src.ip6)) {
+ if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE,
+ &mp->addr.src.ip6))
+ goto nest_err;
+ break;
+ }
break;
#endif
}
- if (dump_srcs_mode &&
- (__mdb_fill_srcs(skb, p) ||
- nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode))) {
- nla_nest_cancel(skb, nest_ent);
- return -EMSGSIZE;
+ if (p) {
+ if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, p->rt_protocol))
+ goto nest_err;
+ if (dump_srcs_mode &&
+ (__mdb_fill_srcs(skb, p) ||
+ nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE,
+ p->filter_mode)))
+ goto nest_err;
}
-
nla_nest_end(skb, nest_ent);
return 0;
+
+nest_err:
+ nla_nest_cancel(skb, nest_ent);
+ return -EMSGSIZE;
}
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
@@ -236,7 +267,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
- if (!p->port)
+ if (!p->key.port)
continue;
if (pidx < s_pidx)
goto skip_pg;
@@ -393,15 +424,24 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
if (!pg)
goto out;
- switch (pg->addr.proto) {
+ /* MDBA_MDB_EATTR_RTPROT */
+ nlmsg_size += nla_total_size(sizeof(u8));
+
+ switch (pg->key.addr.proto) {
case htons(ETH_P_IP):
- if (pg->port->br->multicast_igmp_version == 2)
+ /* MDBA_MDB_EATTR_SOURCE */
+ if (pg->key.addr.src.ip4)
+ nlmsg_size += nla_total_size(sizeof(__be32));
+ if (pg->key.port->br->multicast_igmp_version == 2)
goto out;
addr_size = sizeof(__be32);
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- if (pg->port->br->multicast_mld_version == 1)
+ /* MDBA_MDB_EATTR_SOURCE */
+ if (!ipv6_addr_any(&pg->key.addr.src.ip6))
+ nlmsg_size += nla_total_size(sizeof(struct in6_addr));
+ if (pg->key.port->br->multicast_mld_version == 1)
goto out;
addr_size = sizeof(struct in6_addr);
break;
@@ -450,7 +490,7 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
goto out;
for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
- if (p->port != port)
+ if (p->key.port != port)
continue;
p->flags |= MDB_PG_FLAGS_OFFLOAD;
}
@@ -474,10 +514,10 @@ static void br_mdb_switchdev_host_port(struct net_device *dev,
};
if (mp->addr.proto == htons(ETH_P_IP))
- ip_eth_mc_map(mp->addr.u.ip4, mdb.addr);
+ ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
#if IS_ENABLED(CONFIG_IPV6)
else
- ipv6_eth_mc_map(&mp->addr.u.ip6, mdb.addr);
+ ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
#endif
mdb.obj.orig_dev = dev;
@@ -520,26 +560,26 @@ void br_mdb_notify(struct net_device *dev,
if (pg) {
if (mp->addr.proto == htons(ETH_P_IP))
- ip_eth_mc_map(mp->addr.u.ip4, mdb.addr);
+ ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
#if IS_ENABLED(CONFIG_IPV6)
else
- ipv6_eth_mc_map(&mp->addr.u.ip6, mdb.addr);
+ ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
#endif
- mdb.obj.orig_dev = pg->port->dev;
+ mdb.obj.orig_dev = pg->key.port->dev;
switch (type) {
case RTM_NEWMDB:
complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
if (!complete_info)
break;
- complete_info->port = pg->port;
+ complete_info->port = pg->key.port;
complete_info->ip = mp->addr;
mdb.obj.complete_priv = complete_info;
mdb.obj.complete = br_mdb_complete;
- if (switchdev_port_obj_add(pg->port->dev, &mdb.obj, NULL))
+ if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL))
kfree(complete_info);
break;
case RTM_DELMDB:
- switchdev_port_obj_del(pg->port->dev, &mdb.obj);
+ switchdev_port_obj_del(pg->key.port->dev, &mdb.obj);
break;
}
} else {
@@ -629,33 +669,94 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
-static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
+static bool is_valid_mdb_entry(struct br_mdb_entry *entry,
+ struct netlink_ext_ack *extack)
{
- if (entry->ifindex == 0)
+ if (entry->ifindex == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed");
return false;
+ }
if (entry->addr.proto == htons(ETH_P_IP)) {
- if (!ipv4_is_multicast(entry->addr.u.ip4))
+ if (!ipv4_is_multicast(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast");
return false;
- if (ipv4_is_local_multicast(entry->addr.u.ip4))
+ }
+ if (ipv4_is_local_multicast(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast");
return false;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (entry->addr.proto == htons(ETH_P_IPV6)) {
- if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
+ if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes");
return false;
+ }
#endif
- } else
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol");
return false;
- if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
+ }
+
+ if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown entry state");
return false;
- if (entry->vid >= VLAN_VID_MASK)
+ }
+ if (entry->vid >= VLAN_VID_MASK) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id");
return false;
+ }
+
+ return true;
+}
+
+static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto,
+ struct netlink_ext_ack *extack)
+{
+ switch (proto) {
+ case htons(ETH_P_IP):
+ if (nla_len(attr) != sizeof(struct in_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length");
+ return false;
+ }
+ if (ipv4_is_multicast(nla_get_in_addr(attr))) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed");
+ return false;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6): {
+ struct in6_addr src;
+
+ if (nla_len(attr) != sizeof(struct in6_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length");
+ return false;
+ }
+ src = nla_get_in6_addr(attr);
+ if (ipv6_addr_is_multicast(&src)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed");
+ return false;
+ }
+ break;
+ }
+#endif
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address");
+ return false;
+ }
return true;
}
+static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+};
+
static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct net_device **pdev, struct br_mdb_entry **pentry)
+ struct net_device **pdev, struct br_mdb_entry **pentry,
+ struct nlattr **mdb_attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct br_mdb_entry *entry;
@@ -671,51 +772,86 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
bpm = nlmsg_data(nlh);
if (bpm->ifindex == 0) {
- pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n");
+ NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, bpm->ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n");
+ NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist");
return -ENODEV;
}
if (!(dev->priv_flags & IFF_EBRIDGE)) {
- pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n");
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
return -EOPNOTSUPP;
}
*pdev = dev;
- if (!tb[MDBA_SET_ENTRY] ||
- nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
- pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n");
+ if (!tb[MDBA_SET_ENTRY]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+ if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
return -EINVAL;
}
entry = nla_data(tb[MDBA_SET_ENTRY]);
- if (!is_valid_mdb_entry(entry)) {
- pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n");
+ if (!is_valid_mdb_entry(entry, extack))
return -EINVAL;
+ *pentry = entry;
+
+ if (tb[MDBA_SET_ENTRY_ATTRS]) {
+ err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX,
+ tb[MDBA_SET_ENTRY_ATTRS],
+ br_mdbe_attrs_pol, extack);
+ if (err)
+ return err;
+ if (mdb_attrs[MDBE_ATTR_SOURCE] &&
+ !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE],
+ entry->addr.proto, extack))
+ return -EINVAL;
+ } else {
+ memset(mdb_attrs, 0,
+ sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1));
}
- *pentry = entry;
return 0;
}
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, struct br_mdb_entry *entry)
+ struct br_mdb_entry *entry,
+ struct nlattr **mdb_attrs,
+ struct netlink_ext_ack *extack)
{
- struct net_bridge_mdb_entry *mp;
+ struct net_bridge_mdb_entry *mp, *star_mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct br_ip group, star_group;
unsigned long now = jiffies;
+ u8 filter_mode;
int err;
- mp = br_mdb_ip_get(br, group);
+ __mdb_entry_to_br_ip(entry, &group, mdb_attrs);
+
+ /* host join errors which can happen before creating the group */
+ if (!port) {
+ /* don't allow any flags for host-joined groups */
+ if (entry->state) {
+ NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
+ return -EINVAL;
+ }
+ if (!br_multicast_is_star_g(&group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
+ return -EINVAL;
+ }
+ }
+
+ mp = br_mdb_ip_get(br, &group);
if (!mp) {
- mp = br_multicast_new_group(br, group);
+ mp = br_multicast_new_group(br, &group);
err = PTR_ERR_OR_ZERO(mp);
if (err)
return err;
@@ -723,11 +859,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
/* host join */
if (!port) {
- /* don't allow any flags for host-joined groups */
- if (entry->state)
- return -EINVAL;
- if (mp->host_joined)
+ if (mp->host_joined) {
+ NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
return -EEXIST;
+ }
br_multicast_host_join(mp, false);
br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB);
@@ -738,56 +873,69 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
- if (p->port == port)
+ if (p->key.port == port) {
+ NL_SET_ERR_MSG_MOD(extack, "Group is already joined by port");
return -EEXIST;
- if ((unsigned long)p->port < (unsigned long)port)
+ }
+ if ((unsigned long)p->key.port < (unsigned long)port)
break;
}
- p = br_multicast_new_port_group(port, group, *pp, entry->state, NULL,
- MCAST_EXCLUDE);
- if (unlikely(!p))
+ filter_mode = br_multicast_is_star_g(&group) ? MCAST_EXCLUDE :
+ MCAST_INCLUDE;
+
+ p = br_multicast_new_port_group(port, &group, *pp, entry->state, NULL,
+ filter_mode, RTPROT_STATIC);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
return -ENOMEM;
+ }
rcu_assign_pointer(*pp, p);
if (entry->state == MDB_TEMPORARY)
mod_timer(&p->timer, now + br->multicast_membership_interval);
br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
+ /* if we are adding a new EXCLUDE port group (*,G) it needs to be also
+ * added to all S,G entries for proper replication, if we are adding
+ * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
+ * added to it for proper replication
+ */
+ if (br_multicast_should_handle_mode(br, group.proto)) {
+ switch (filter_mode) {
+ case MCAST_EXCLUDE:
+ br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
+ break;
+ case MCAST_INCLUDE:
+ star_group = p->key.addr;
+ memset(&star_group.src, 0, sizeof(star_group.src));
+ star_mp = br_mdb_ip_get(br, &star_group);
+ if (star_mp)
+ br_multicast_sg_add_exclude_ports(star_mp, p);
+ break;
+ }
+ }
return 0;
}
static int __br_mdb_add(struct net *net, struct net_bridge *br,
- struct br_mdb_entry *entry)
+ struct net_bridge_port *p,
+ struct br_mdb_entry *entry,
+ struct nlattr **mdb_attrs,
+ struct netlink_ext_ack *extack)
{
- struct br_ip ip;
- struct net_device *dev;
- struct net_bridge_port *p = NULL;
int ret;
- if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
- return -EINVAL;
-
- if (entry->ifindex != br->dev->ifindex) {
- dev = __dev_get_by_index(net, entry->ifindex);
- if (!dev)
- return -ENODEV;
-
- p = br_port_get_rtnl(dev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
- return -EINVAL;
- }
-
- __mdb_entry_to_br_ip(entry, &ip);
-
spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, &ip, entry);
+ ret = br_mdb_add_group(br, p, entry, mdb_attrs, extack);
spin_unlock_bh(&br->multicast_lock);
+
return ret;
}
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
@@ -797,20 +945,43 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_bridge *br;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry);
+ err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
if (err < 0)
return err;
br = netdev_priv(dev);
+ if (!netif_running(br->dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Bridge device is not running");
+ return -EINVAL;
+ }
+
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Bridge's multicast processing is disabled");
+ return -EINVAL;
+ }
+
if (entry->ifindex != br->dev->ifindex) {
pdev = __dev_get_by_index(net, entry->ifindex);
- if (!pdev)
+ if (!pdev) {
+ NL_SET_ERR_MSG_MOD(extack, "Port net device doesn't exist");
return -ENODEV;
+ }
p = br_port_get_rtnl(pdev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
+ return -EINVAL;
+ }
+
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
return -EINVAL;
+ }
+ if (p->state == BR_STATE_DISABLED) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state");
+ return -EINVAL;
+ }
vg = nbp_vlan_group(p);
} else {
vg = br_vlan_group(br);
@@ -822,18 +993,19 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
- err = __br_mdb_add(net, br, entry);
+ err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
if (err)
break;
}
} else {
- err = __br_mdb_add(net, br, entry);
+ err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
}
return err;
}
-static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
+static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry,
+ struct nlattr **mdb_attrs)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
@@ -844,7 +1016,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
return -EINVAL;
- __mdb_entry_to_br_ip(entry, &ip);
+ __mdb_entry_to_br_ip(entry, &ip, mdb_attrs);
spin_lock_bh(&br->multicast_lock);
mp = br_mdb_ip_get(br, &ip);
@@ -864,10 +1036,10 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
- if (!p->port || p->port->dev->ifindex != entry->ifindex)
+ if (!p->key.port || p->key.port->dev->ifindex != entry->ifindex)
continue;
- if (p->port->state == BR_STATE_DISABLED)
+ if (p->key.port->state == BR_STATE_DISABLED)
goto unlock;
br_multicast_del_pg(mp, p, pp);
@@ -883,6 +1055,7 @@ unlock:
static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
@@ -892,7 +1065,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_bridge *br;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry);
+ err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
if (err < 0)
return err;
@@ -917,10 +1090,10 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
- err = __br_mdb_del(br, entry);
+ err = __br_mdb_del(br, entry, mdb_attrs);
}
} else {
- err = __br_mdb_del(br, entry);
+ err = __br_mdb_del(br, entry, mdb_attrs);
}
return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e77f1e27caf7..66eb62ded192 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -41,6 +41,13 @@ static const struct rhashtable_params br_mdb_rht_params = {
.automatic_shrinking = true,
};
+static const struct rhashtable_params br_sg_port_rht_params = {
+ .head_offset = offsetof(struct net_bridge_port_group, rhnode),
+ .key_offset = offsetof(struct net_bridge_port_group, key),
+ .key_len = sizeof(struct net_bridge_port_group_sg_key),
+ .automatic_shrinking = true,
+};
+
static void br_multicast_start_querier(struct net_bridge *br,
struct bridge_mcast_own_query *query);
static void br_multicast_add_router(struct net_bridge *br,
@@ -59,6 +66,26 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
+static struct net_bridge_port_group *
+__br_multicast_add_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ const unsigned char *src,
+ u8 filter_mode,
+ bool igmpv2_mldv1,
+ bool blocked);
+static void br_multicast_find_del_pg(struct net_bridge *br,
+ struct net_bridge_port_group *pg);
+
+static struct net_bridge_port_group *
+br_sg_port_find(struct net_bridge *br,
+ struct net_bridge_port_group_sg_key *sg_p)
+{
+ lockdep_assert_held_once(&br->multicast_lock);
+
+ return rhashtable_lookup_fast(&br->sg_port_tbl, sg_p,
+ br_sg_port_rht_params);
+}
static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br,
struct br_ip *dst)
@@ -86,7 +113,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br,
struct br_ip br_dst;
memset(&br_dst, 0, sizeof(br_dst));
- br_dst.u.ip4 = dst;
+ br_dst.dst.ip4 = dst;
br_dst.proto = htons(ETH_P_IP);
br_dst.vid = vid;
@@ -101,7 +128,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
struct br_ip br_dst;
memset(&br_dst, 0, sizeof(br_dst));
- br_dst.u.ip6 = *dst;
+ br_dst.dst.ip6 = *dst;
br_dst.proto = htons(ETH_P_IPV6);
br_dst.vid = vid;
@@ -126,11 +153,29 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
switch (skb->protocol) {
case htons(ETH_P_IP):
- ip.u.ip4 = ip_hdr(skb)->daddr;
+ ip.dst.ip4 = ip_hdr(skb)->daddr;
+ if (br->multicast_igmp_version == 3) {
+ struct net_bridge_mdb_entry *mdb;
+
+ ip.src.ip4 = ip_hdr(skb)->saddr;
+ mdb = br_mdb_ip_get_rcu(br, &ip);
+ if (mdb)
+ return mdb;
+ ip.src.ip4 = 0;
+ }
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- ip.u.ip6 = ipv6_hdr(skb)->daddr;
+ ip.dst.ip6 = ipv6_hdr(skb)->daddr;
+ if (br->multicast_mld_version == 2) {
+ struct net_bridge_mdb_entry *mdb;
+
+ ip.src.ip6 = ipv6_hdr(skb)->saddr;
+ mdb = br_mdb_ip_get_rcu(br, &ip);
+ if (mdb)
+ return mdb;
+ memset(&ip.src.ip6, 0, sizeof(ip.src.ip6));
+ }
break;
#endif
default:
@@ -140,6 +185,326 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return br_mdb_ip_get_rcu(br, &ip);
}
+static bool br_port_group_equal(struct net_bridge_port_group *p,
+ struct net_bridge_port *port,
+ const unsigned char *src)
+{
+ if (p->key.port != port)
+ return false;
+
+ if (!(port->flags & BR_MULTICAST_TO_UNICAST))
+ return true;
+
+ return ether_addr_equal(src, p->eth_addr);
+}
+
+static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
+ struct br_ip *sg_ip)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_port_group *src_pg;
+
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.port = pg->key.port;
+ sg_key.addr = *sg_ip;
+ if (br_sg_port_find(br, &sg_key))
+ return;
+
+ src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
+ MCAST_INCLUDE, false, false);
+ if (IS_ERR_OR_NULL(src_pg) ||
+ src_pg->rt_protocol != RTPROT_KERNEL)
+ return;
+
+ src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL;
+}
+
+static void __fwd_del_star_excl(struct net_bridge_port_group *pg,
+ struct br_ip *sg_ip)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_port_group *src_pg;
+
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.port = pg->key.port;
+ sg_key.addr = *sg_ip;
+ src_pg = br_sg_port_find(br, &sg_key);
+ if (!src_pg || !(src_pg->flags & MDB_PG_FLAGS_STAR_EXCL) ||
+ src_pg->rt_protocol != RTPROT_KERNEL)
+ return;
+
+ br_multicast_find_del_pg(br, src_pg);
+}
+
+/* When a port group transitions to (or is added as) EXCLUDE we need to add it
+ * to all other ports' S,G entries which are not blocked by the current group
+ * for proper replication, the assumption is that any S,G blocked entries
+ * are already added so the S,G,port lookup should skip them.
+ * When a port group transitions from EXCLUDE -> INCLUDE mode or is being
+ * deleted we need to remove it from all ports' S,G entries where it was
+ * automatically installed before (i.e. where it's MDB_PG_FLAGS_STAR_EXCL).
+ */
+void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
+ u8 filter_mode)
+{
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_port_group *pg_lst;
+ struct net_bridge_mdb_entry *mp;
+ struct br_ip sg_ip;
+
+ if (WARN_ON(!br_multicast_is_star_g(&pg->key.addr)))
+ return;
+
+ mp = br_mdb_ip_get(br, &pg->key.addr);
+ if (!mp)
+ return;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = pg->key.addr;
+ for (pg_lst = mlock_dereference(mp->ports, br);
+ pg_lst;
+ pg_lst = mlock_dereference(pg_lst->next, br)) {
+ struct net_bridge_group_src *src_ent;
+
+ if (pg_lst == pg)
+ continue;
+ hlist_for_each_entry(src_ent, &pg_lst->src_list, node) {
+ if (!(src_ent->flags & BR_SGRP_F_INSTALLED))
+ continue;
+ sg_ip.src = src_ent->addr.src;
+ switch (filter_mode) {
+ case MCAST_INCLUDE:
+ __fwd_del_star_excl(pg, &sg_ip);
+ break;
+ case MCAST_EXCLUDE:
+ __fwd_add_star_excl(pg, &sg_ip);
+ break;
+ }
+ }
+ }
+}
+
+/* called when adding a new S,G with host_joined == false by default */
+static void br_multicast_sg_host_state(struct net_bridge_mdb_entry *star_mp,
+ struct net_bridge_port_group *sg)
+{
+ struct net_bridge_mdb_entry *sg_mp;
+
+ if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr)))
+ return;
+ if (!star_mp->host_joined)
+ return;
+
+ sg_mp = br_mdb_ip_get(star_mp->br, &sg->key.addr);
+ if (!sg_mp)
+ return;
+ sg_mp->host_joined = true;
+}
+
+/* set the host_joined state of all of *,G's S,G entries */
+static void br_multicast_star_g_host_state(struct net_bridge_mdb_entry *star_mp)
+{
+ struct net_bridge *br = star_mp->br;
+ struct net_bridge_mdb_entry *sg_mp;
+ struct net_bridge_port_group *pg;
+ struct br_ip sg_ip;
+
+ if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr)))
+ return;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = star_mp->addr;
+ for (pg = mlock_dereference(star_mp->ports, br);
+ pg;
+ pg = mlock_dereference(pg->next, br)) {
+ struct net_bridge_group_src *src_ent;
+
+ hlist_for_each_entry(src_ent, &pg->src_list, node) {
+ if (!(src_ent->flags & BR_SGRP_F_INSTALLED))
+ continue;
+ sg_ip.src = src_ent->addr.src;
+ sg_mp = br_mdb_ip_get(br, &sg_ip);
+ if (!sg_mp)
+ continue;
+ sg_mp->host_joined = star_mp->host_joined;
+ }
+ }
+}
+
+static void br_multicast_sg_del_exclude_ports(struct net_bridge_mdb_entry *sgmp)
+{
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+
+ /* *,G exclude ports are only added to S,G entries */
+ if (WARN_ON(br_multicast_is_star_g(&sgmp->addr)))
+ return;
+
+ /* we need the STAR_EXCLUDE ports if there are non-STAR_EXCLUDE ports
+ * we should ignore perm entries since they're managed by user-space
+ */
+ for (pp = &sgmp->ports;
+ (p = mlock_dereference(*pp, sgmp->br)) != NULL;
+ pp = &p->next)
+ if (!(p->flags & (MDB_PG_FLAGS_STAR_EXCL |
+ MDB_PG_FLAGS_PERMANENT)))
+ return;
+
+ /* currently the host can only have joined the *,G which means
+ * we treat it as EXCLUDE {}, so for an S,G it's considered a
+ * STAR_EXCLUDE entry and we can safely leave it
+ */
+ sgmp->host_joined = false;
+
+ for (pp = &sgmp->ports;
+ (p = mlock_dereference(*pp, sgmp->br)) != NULL;) {
+ if (!(p->flags & MDB_PG_FLAGS_PERMANENT))
+ br_multicast_del_pg(sgmp, p, pp);
+ else
+ pp = &p->next;
+ }
+}
+
+void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
+ struct net_bridge_port_group *sg)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge *br = star_mp->br;
+ struct net_bridge_port_group *pg;
+
+ if (WARN_ON(br_multicast_is_star_g(&sg->key.addr)))
+ return;
+ if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr)))
+ return;
+
+ br_multicast_sg_host_state(star_mp, sg);
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.addr = sg->key.addr;
+ /* we need to add all exclude ports to the S,G */
+ for (pg = mlock_dereference(star_mp->ports, br);
+ pg;
+ pg = mlock_dereference(pg->next, br)) {
+ struct net_bridge_port_group *src_pg;
+
+ if (pg == sg || pg->filter_mode == MCAST_INCLUDE)
+ continue;
+
+ sg_key.port = pg->key.port;
+ if (br_sg_port_find(br, &sg_key))
+ continue;
+
+ src_pg = __br_multicast_add_group(br, pg->key.port,
+ &sg->key.addr,
+ sg->eth_addr,
+ MCAST_INCLUDE, false, false);
+ if (IS_ERR_OR_NULL(src_pg) ||
+ src_pg->rt_protocol != RTPROT_KERNEL)
+ continue;
+ src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL;
+ }
+}
+
+static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
+{
+ struct net_bridge_mdb_entry *star_mp;
+ struct net_bridge_port_group *sg;
+ struct br_ip sg_ip;
+
+ if (src->flags & BR_SGRP_F_INSTALLED)
+ return;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = src->pg->key.addr;
+ sg_ip.src = src->addr.src;
+ sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip,
+ src->pg->eth_addr, MCAST_INCLUDE, false,
+ !timer_pending(&src->timer));
+ if (IS_ERR_OR_NULL(sg))
+ return;
+ src->flags |= BR_SGRP_F_INSTALLED;
+ sg->flags &= ~MDB_PG_FLAGS_STAR_EXCL;
+
+ /* if it was added by user-space as perm we can skip next steps */
+ if (sg->rt_protocol != RTPROT_KERNEL &&
+ (sg->flags & MDB_PG_FLAGS_PERMANENT))
+ return;
+
+ /* the kernel is now responsible for removing this S,G */
+ del_timer(&sg->timer);
+ star_mp = br_mdb_ip_get(src->br, &src->pg->key.addr);
+ if (!star_mp)
+ return;
+
+ br_multicast_sg_add_exclude_ports(star_mp, sg);
+}
+
+static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
+{
+ struct net_bridge_port_group *p, *pg = src->pg;
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_mdb_entry *mp;
+ struct br_ip sg_ip;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = pg->key.addr;
+ sg_ip.src = src->addr.src;
+
+ mp = br_mdb_ip_get(src->br, &sg_ip);
+ if (!mp)
+ return;
+
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, src->br)) != NULL;
+ pp = &p->next) {
+ if (!br_port_group_equal(p, pg->key.port, pg->eth_addr))
+ continue;
+
+ if (p->rt_protocol != RTPROT_KERNEL &&
+ (p->flags & MDB_PG_FLAGS_PERMANENT))
+ break;
+
+ br_multicast_del_pg(mp, p, pp);
+ break;
+ }
+ src->flags &= ~BR_SGRP_F_INSTALLED;
+}
+
+/* install S,G and based on src's timer enable or disable forwarding */
+static void br_multicast_fwd_src_handle(struct net_bridge_group_src *src)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge_port_group *sg;
+ u8 old_flags;
+
+ br_multicast_fwd_src_add(src);
+
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.addr = src->pg->key.addr;
+ sg_key.addr.src = src->addr.src;
+ sg_key.port = src->pg->key.port;
+
+ sg = br_sg_port_find(src->br, &sg_key);
+ if (!sg || (sg->flags & MDB_PG_FLAGS_PERMANENT))
+ return;
+
+ old_flags = sg->flags;
+ if (timer_pending(&src->timer))
+ sg->flags &= ~MDB_PG_FLAGS_BLOCKED;
+ else
+ sg->flags |= MDB_PG_FLAGS_BLOCKED;
+
+ if (old_flags != sg->flags) {
+ struct net_bridge_mdb_entry *sg_mp;
+
+ sg_mp = br_mdb_ip_get(src->br, &sg_key.addr);
+ if (!sg_mp)
+ return;
+ br_mdb_notify(src->br->dev, sg_mp, sg, RTM_NEWMDB);
+ }
+}
+
static void br_multicast_destroy_mdb_entry(struct net_bridge_mcast_gc *gc)
{
struct net_bridge_mdb_entry *mp;
@@ -169,7 +534,8 @@ static void br_multicast_group_expired(struct timer_list *t)
struct net_bridge *br = mp->br;
spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || timer_pending(&mp->timer))
+ if (hlist_unhashed(&mp->mdb_node) || !netif_running(br->dev) ||
+ timer_pending(&mp->timer))
goto out;
br_multicast_host_leave(mp, true);
@@ -194,8 +560,9 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc)
static void br_multicast_del_group_src(struct net_bridge_group_src *src)
{
- struct net_bridge *br = src->pg->port->br;
+ struct net_bridge *br = src->pg->key.port->br;
+ br_multicast_fwd_src_remove(src);
hlist_del_init_rcu(&src->node);
src->pg->src_ents--;
hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list);
@@ -219,15 +586,21 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg,
struct net_bridge_port_group __rcu **pp)
{
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
struct hlist_node *tmp;
+ rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode,
+ br_sg_port_rht_params);
rcu_assign_pointer(*pp, pg->next);
hlist_del_init(&pg->mglist);
hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
br_multicast_del_group_src(ent);
br_mdb_notify(br->dev, mp, pg, RTM_DELMDB);
+ if (!br_multicast_is_star_g(&mp->addr))
+ br_multicast_sg_del_exclude_ports(mp);
+ else
+ br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
queue_work(system_long_wq, &br->mcast_gc_work);
@@ -242,7 +615,7 @@ static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
- mp = br_mdb_ip_get(br, &pg->addr);
+ mp = br_mdb_ip_get(br, &pg->key.addr);
if (WARN_ON(!mp))
return;
@@ -263,7 +636,7 @@ static void br_multicast_port_group_expired(struct timer_list *t)
{
struct net_bridge_port_group *pg = from_timer(pg, t, timer);
struct net_bridge_group_src *src_ent;
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
struct hlist_node *tmp;
bool changed;
@@ -284,7 +657,10 @@ static void br_multicast_port_group_expired(struct timer_list *t)
if (hlist_empty(&pg->src_list)) {
br_multicast_find_del_pg(br, pg);
} else if (changed) {
- struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->addr);
+ struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->key.addr);
+
+ if (changed && br_multicast_is_star_g(&pg->key.addr))
+ br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
if (WARN_ON(!mp))
goto out;
@@ -312,7 +688,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
u8 sflag, u8 *igmp_type,
bool *need_rexmit)
{
- struct net_bridge_port *p = pg ? pg->port : NULL;
+ struct net_bridge_port *p = pg ? pg->key.port : NULL;
struct net_bridge_group_src *ent;
size_t pkt_size, igmp_hdr_size;
unsigned long now = jiffies;
@@ -423,7 +799,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
if (over_lmqt == time_after(ent->timer.expires,
lmqt) &&
ent->src_query_rexmit_cnt > 0) {
- ihv3->srcs[lmqt_srcs++] = ent->addr.u.ip4;
+ ihv3->srcs[lmqt_srcs++] = ent->addr.src.ip4;
ent->src_query_rexmit_cnt--;
if (need_rexmit && ent->src_query_rexmit_cnt)
*need_rexmit = true;
@@ -458,7 +834,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
u8 sflag, u8 *igmp_type,
bool *need_rexmit)
{
- struct net_bridge_port *p = pg ? pg->port : NULL;
+ struct net_bridge_port *p = pg ? pg->key.port : NULL;
struct net_bridge_group_src *ent;
size_t pkt_size, mld_hdr_size;
unsigned long now = jiffies;
@@ -584,7 +960,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
if (over_llqt == time_after(ent->timer.expires,
llqt) &&
ent->src_query_rexmit_cnt > 0) {
- mld2q->mld2q_srcs[llqt_srcs++] = ent->addr.u.ip6;
+ mld2q->mld2q_srcs[llqt_srcs++] = ent->addr.src.ip6;
ent->src_query_rexmit_cnt--;
if (need_rexmit && ent->src_query_rexmit_cnt)
*need_rexmit = true;
@@ -625,9 +1001,9 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
switch (group->proto) {
case htons(ETH_P_IP):
- ip4_dst = ip_dst ? ip_dst->u.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
+ ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
return br_ip4_multicast_alloc_query(br, pg,
- ip4_dst, group->u.ip4,
+ ip4_dst, group->dst.ip4,
with_srcs, over_lmqt,
sflag, igmp_type,
need_rexmit);
@@ -636,13 +1012,13 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
struct in6_addr ip6_dst;
if (ip_dst)
- ip6_dst = ip_dst->u.ip6;
+ ip6_dst = ip_dst->dst.ip6;
else
ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0,
htonl(1));
return br_ip6_multicast_alloc_query(br, pg,
- &ip6_dst, &group->u.ip6,
+ &ip6_dst, &group->dst.ip6,
with_srcs, over_lmqt,
sflag, igmp_type,
need_rexmit);
@@ -704,7 +1080,10 @@ static void br_multicast_group_src_expired(struct timer_list *t)
if (!hlist_empty(&pg->src_list))
goto out;
br_multicast_find_del_pg(br, pg);
+ } else {
+ br_multicast_fwd_src_handle(src);
}
+
out:
spin_unlock(&br->multicast_lock);
}
@@ -717,13 +1096,13 @@ br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip)
switch (ip->proto) {
case htons(ETH_P_IP):
hlist_for_each_entry(ent, &pg->src_list, node)
- if (ip->u.ip4 == ent->addr.u.ip4)
+ if (ip->src.ip4 == ent->addr.src.ip4)
return ent;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
hlist_for_each_entry(ent, &pg->src_list, node)
- if (!ipv6_addr_cmp(&ent->addr.u.ip6, &ip->u.ip6))
+ if (!ipv6_addr_cmp(&ent->addr.src.ip6, &ip->src.ip6))
return ent;
break;
#endif
@@ -742,14 +1121,14 @@ br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_i
switch (src_ip->proto) {
case htons(ETH_P_IP):
- if (ipv4_is_zeronet(src_ip->u.ip4) ||
- ipv4_is_multicast(src_ip->u.ip4))
+ if (ipv4_is_zeronet(src_ip->src.ip4) ||
+ ipv4_is_multicast(src_ip->src.ip4))
return NULL;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- if (ipv6_addr_any(&src_ip->u.ip6) ||
- ipv6_addr_is_multicast(&src_ip->u.ip6))
+ if (ipv6_addr_any(&src_ip->src.ip6) ||
+ ipv6_addr_is_multicast(&src_ip->src.ip6))
return NULL;
break;
#endif
@@ -760,7 +1139,7 @@ br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_i
return NULL;
grp_src->pg = pg;
- grp_src->br = pg->port->br;
+ grp_src->br = pg->key.port->br;
grp_src->addr = *src_ip;
grp_src->mcast_gc.destroy = br_multicast_destroy_group_src;
timer_setup(&grp_src->timer, br_multicast_group_src_expired, 0);
@@ -777,7 +1156,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port_group __rcu *next,
unsigned char flags,
const unsigned char *src,
- u8 filter_mode)
+ u8 filter_mode,
+ u8 rt_protocol)
{
struct net_bridge_port_group *p;
@@ -785,12 +1165,21 @@ struct net_bridge_port_group *br_multicast_new_port_group(
if (unlikely(!p))
return NULL;
- p->addr = *group;
- p->port = port;
+ p->key.addr = *group;
+ p->key.port = port;
p->flags = flags;
p->filter_mode = filter_mode;
+ p->rt_protocol = rt_protocol;
p->mcast_gc.destroy = br_multicast_destroy_port_group;
INIT_HLIST_HEAD(&p->src_list);
+
+ if (!br_multicast_is_star_g(group) &&
+ rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode,
+ br_sg_port_rht_params)) {
+ kfree(p);
+ return NULL;
+ }
+
rcu_assign_pointer(p->next, next);
timer_setup(&p->timer, br_multicast_port_group_expired, 0);
timer_setup(&p->rexmit_timer, br_multicast_port_group_rexmit, 0);
@@ -804,23 +1193,12 @@ struct net_bridge_port_group *br_multicast_new_port_group(
return p;
}
-static bool br_port_group_equal(struct net_bridge_port_group *p,
- struct net_bridge_port *port,
- const unsigned char *src)
-{
- if (p->port != port)
- return false;
-
- if (!(port->flags & BR_MULTICAST_TO_UNICAST))
- return true;
-
- return ether_addr_equal(src, p->eth_addr);
-}
-
void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
{
if (!mp->host_joined) {
mp->host_joined = true;
+ if (br_multicast_is_star_g(&mp->addr))
+ br_multicast_star_g_host_state(mp);
if (notify)
br_mdb_notify(mp->br->dev, mp, NULL, RTM_NEWMDB);
}
@@ -833,32 +1211,33 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
return;
mp->host_joined = false;
+ if (br_multicast_is_star_g(&mp->addr))
+ br_multicast_star_g_host_state(mp);
if (notify)
br_mdb_notify(mp->br->dev, mp, NULL, RTM_DELMDB);
}
-static int br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
- struct br_ip *group,
- const unsigned char *src,
- u8 filter_mode,
- bool igmpv2_mldv1)
+static struct net_bridge_port_group *
+__br_multicast_add_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ const unsigned char *src,
+ u8 filter_mode,
+ bool igmpv2_mldv1,
+ bool blocked)
{
struct net_bridge_port_group __rcu **pp;
- struct net_bridge_port_group *p;
+ struct net_bridge_port_group *p = NULL;
struct net_bridge_mdb_entry *mp;
unsigned long now = jiffies;
- int err;
- spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED))
goto out;
mp = br_multicast_new_group(br, group);
- err = PTR_ERR(mp);
if (IS_ERR(mp))
- goto err;
+ return ERR_PTR(PTR_ERR(mp));
if (!port) {
br_multicast_host_join(mp, true);
@@ -870,14 +1249,19 @@ static int br_multicast_add_group(struct net_bridge *br,
pp = &p->next) {
if (br_port_group_equal(p, port, src))
goto found;
- if ((unsigned long)p->port < (unsigned long)port)
+ if ((unsigned long)p->key.port < (unsigned long)port)
break;
}
- p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode);
- if (unlikely(!p))
- goto err;
+ p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode,
+ RTPROT_KERNEL);
+ if (unlikely(!p)) {
+ p = ERR_PTR(-ENOMEM);
+ goto out;
+ }
rcu_assign_pointer(*pp, p);
+ if (blocked)
+ p->flags |= MDB_PG_FLAGS_BLOCKED;
br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
found:
@@ -885,10 +1269,26 @@ found:
mod_timer(&p->timer, now + br->multicast_membership_interval);
out:
- err = 0;
+ return p;
+}
-err:
+static int br_multicast_add_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ const unsigned char *src,
+ u8 filter_mode,
+ bool igmpv2_mldv1)
+{
+ struct net_bridge_port_group *pg;
+ int err;
+
+ spin_lock(&br->multicast_lock);
+ pg = __br_multicast_add_group(br, port, group, src, filter_mode,
+ igmpv2_mldv1, false);
+ /* NULL is considered valid for host joined groups */
+ err = IS_ERR(pg) ? PTR_ERR(pg) : 0;
spin_unlock(&br->multicast_lock);
+
return err;
}
@@ -906,7 +1306,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
return 0;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip4 = group;
+ br_group.dst.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE;
@@ -930,7 +1330,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
return 0;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip6 = *group;
+ br_group.dst.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE;
@@ -1019,10 +1419,10 @@ static void br_multicast_select_own_querier(struct net_bridge *br,
struct sk_buff *skb)
{
if (ip->proto == htons(ETH_P_IP))
- br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr;
+ br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
#if IS_ENABLED(CONFIG_IPV6)
else
- br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr;
+ br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
#endif
}
@@ -1079,7 +1479,7 @@ static void br_multicast_send_query(struct net_bridge *br,
!br_opt_get(br, BROPT_MULTICAST_QUERIER))
return;
- memset(&br_group.u, 0, sizeof(br_group.u));
+ memset(&br_group.dst, 0, sizeof(br_group.dst));
if (port ? (own_query == &port->ip4_own_query) :
(own_query == &br->ip4_own_query)) {
@@ -1145,7 +1545,7 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
{
struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer);
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
bool need_rexmit = false;
spin_lock(&br->multicast_lock);
@@ -1154,7 +1554,7 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
!br_opt_get(br, BROPT_MULTICAST_QUERIER))
goto out;
- if (pg->addr.proto == htons(ETH_P_IP))
+ if (pg->key.addr.proto == htons(ETH_P_IP))
other_query = &br->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
@@ -1166,11 +1566,11 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
if (pg->grp_query_rexmit_cnt) {
pg->grp_query_rexmit_cnt--;
- __br_multicast_send_query(br, pg->port, pg, &pg->addr,
- &pg->addr, false, 1, NULL);
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, false, 1, NULL);
}
- __br_multicast_send_query(br, pg->port, pg, &pg->addr,
- &pg->addr, true, 0, &need_rexmit);
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, true, 0, &need_rexmit);
if (pg->grp_query_rexmit_cnt || need_rexmit)
mod_timer(&pg->rexmit_timer, jiffies +
@@ -1301,10 +1701,17 @@ static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
return deleted;
}
+static void __grp_src_mod_timer(struct net_bridge_group_src *src,
+ unsigned long expires)
+{
+ mod_timer(&src->timer, expires);
+ br_multicast_fwd_src_handle(src);
+}
+
static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
u32 lmqc = br->multicast_last_member_count;
unsigned long lmqt, lmi, now = jiffies;
struct net_bridge_group_src *ent;
@@ -1313,7 +1720,7 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return;
- if (pg->addr.proto == htons(ETH_P_IP))
+ if (pg->key.addr.proto == htons(ETH_P_IP))
other_query = &br->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
@@ -1329,7 +1736,7 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
other_query &&
!timer_pending(&other_query->timer))
ent->src_query_rexmit_cnt = lmqc;
- mod_timer(&ent->timer, lmqt);
+ __grp_src_mod_timer(ent, lmqt);
}
}
}
@@ -1338,8 +1745,8 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, pg->port, pg, &pg->addr,
- &pg->addr, true, 1, NULL);
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, true, 1, NULL);
lmi = now + br->multicast_last_member_interval;
if (!timer_pending(&pg->rexmit_timer) ||
@@ -1350,14 +1757,14 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
unsigned long now = jiffies, lmi;
if (!netif_running(br->dev) ||
!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return;
- if (pg->addr.proto == htons(ETH_P_IP))
+ if (pg->key.addr.proto == htons(ETH_P_IP))
other_query = &br->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
@@ -1368,8 +1775,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
other_query && !timer_pending(&other_query->timer)) {
lmi = now + br->multicast_last_member_interval;
pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1;
- __br_multicast_send_query(br, pg->port, pg, &pg->addr,
- &pg->addr, false, 0, NULL);
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, false, 0, NULL);
if (!timer_pending(&pg->rexmit_timer) ||
time_after(pg->rexmit_timer.expires, lmi))
mod_timer(&pg->rexmit_timer, lmi);
@@ -1389,7 +1796,7 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
void *srcs, u32 nsrcs, size_t src_size)
{
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1397,9 +1804,9 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
u32 src_idx;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (!ent) {
ent = br_multicast_new_group_src(pg, &src_ip);
@@ -1408,7 +1815,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
}
if (ent)
- mod_timer(&ent->timer, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
srcs += src_size;
}
@@ -1431,14 +1838,16 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
ent->flags |= BR_SGRP_F_DELETE;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent)
ent->flags &= ~BR_SGRP_F_DELETE;
else
- br_multicast_new_group_src(pg, &src_ip);
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent)
+ br_multicast_fwd_src_handle(ent);
srcs += src_size;
}
@@ -1454,7 +1863,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
void *srcs, u32 nsrcs, size_t src_size)
{
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1465,17 +1874,17 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
ent->flags |= BR_SGRP_F_DELETE;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags &= ~BR_SGRP_F_DELETE;
} else {
ent = br_multicast_new_group_src(pg, &src_ip);
if (ent) {
- mod_timer(&ent->timer,
- now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent,
+ now + br_multicast_gmi(br));
changed = true;
}
}
@@ -1491,12 +1900,13 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
static bool br_multicast_isexc(struct net_bridge_port_group *pg,
void *srcs, u32 nsrcs, size_t src_size)
{
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
__grp_src_isexc_incl(pg, srcs, nsrcs, src_size);
+ br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
@@ -1517,7 +1927,7 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg,
static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
void *srcs, u32 nsrcs, size_t src_size)
{
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -1528,9 +1938,9 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
ent->flags |= BR_SGRP_F_SEND;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags &= ~BR_SGRP_F_SEND;
@@ -1541,7 +1951,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
changed = true;
}
if (ent)
- mod_timer(&ent->timer, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
srcs += src_size;
}
@@ -1559,7 +1969,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
void *srcs, u32 nsrcs, size_t src_size)
{
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -1571,9 +1981,9 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
ent->flags |= BR_SGRP_F_SEND;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
if (timer_pending(&ent->timer)) {
@@ -1586,7 +1996,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
changed = true;
}
if (ent)
- mod_timer(&ent->timer, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
srcs += src_size;
}
@@ -1632,17 +2042,19 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) |
BR_SGRP_F_SEND;
to_send++;
} else {
- br_multicast_new_group_src(pg, &src_ip);
+ ent = br_multicast_new_group_src(pg, &src_ip);
}
+ if (ent)
+ br_multicast_fwd_src_handle(ent);
srcs += src_size;
}
@@ -1670,16 +2082,16 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags &= ~BR_SGRP_F_DELETE;
} else {
ent = br_multicast_new_group_src(pg, &src_ip);
if (ent) {
- mod_timer(&ent->timer, pg->timer.expires);
+ __grp_src_mod_timer(ent, pg->timer.expires);
changed = true;
}
}
@@ -1701,12 +2113,13 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
static bool br_multicast_toex(struct net_bridge_port_group *pg,
void *srcs, u32 nsrcs, size_t src_size)
{
- struct net_bridge *br = pg->port->br;
+ struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
__grp_src_toex_incl(pg, srcs, nsrcs, src_size);
+ br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
@@ -1734,9 +2147,9 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg,
ent->flags &= ~BR_SGRP_F_SEND;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags |= BR_SGRP_F_SEND;
@@ -1749,7 +2162,7 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg,
__grp_src_query_marked_and_rexmit(pg);
if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list))
- br_multicast_find_del_pg(pg->port->br, pg);
+ br_multicast_find_del_pg(pg->key.port->br, pg);
}
/* State Msg type New state Actions
@@ -1768,14 +2181,14 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
ent->flags &= ~BR_SGRP_F_SEND;
memset(&src_ip, 0, sizeof(src_ip));
- src_ip.proto = pg->addr.proto;
+ src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.u, srcs, src_size);
+ memcpy(&src_ip.src, srcs, src_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (!ent) {
ent = br_multicast_new_group_src(pg, &src_ip);
if (ent) {
- mod_timer(&ent->timer, pg->timer.expires);
+ __grp_src_mod_timer(ent, pg->timer.expires);
changed = true;
}
}
@@ -2071,16 +2484,16 @@ static bool br_ip4_multicast_select_querier(struct net_bridge *br,
!timer_pending(&br->ip4_other_query.timer))
goto update;
- if (!br->ip4_querier.addr.u.ip4)
+ if (!br->ip4_querier.addr.src.ip4)
goto update;
- if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4))
+ if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4))
goto update;
return false;
update:
- br->ip4_querier.addr.u.ip4 = saddr;
+ br->ip4_querier.addr.src.ip4 = saddr;
/* update protected by general multicast_lock by caller */
rcu_assign_pointer(br->ip4_querier.port, port);
@@ -2097,13 +2510,13 @@ static bool br_ip6_multicast_select_querier(struct net_bridge *br,
!timer_pending(&br->ip6_other_query.timer))
goto update;
- if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0)
+ if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0)
goto update;
return false;
update:
- br->ip6_querier.addr.u.ip6 = *saddr;
+ br->ip6_querier.addr.src.ip6 = *saddr;
/* update protected by general multicast_lock by caller */
rcu_assign_pointer(br->ip6_querier.port, port);
@@ -2118,10 +2531,10 @@ static bool br_multicast_select_querier(struct net_bridge *br,
{
switch (saddr->proto) {
case htons(ETH_P_IP):
- return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
+ return br_ip4_multicast_select_querier(br, port, saddr->src.ip4);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
+ return br_ip6_multicast_select_querier(br, port, &saddr->src.ip6);
#endif
}
@@ -2263,7 +2676,7 @@ static void br_ip4_multicast_query(struct net_bridge *br,
if (!group) {
saddr.proto = htons(ETH_P_IP);
- saddr.u.ip4 = iph->saddr;
+ saddr.src.ip4 = iph->saddr;
br_multicast_query_received(br, port, &br->ip4_other_query,
&saddr, max_delay);
@@ -2351,7 +2764,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (is_general_query) {
saddr.proto = htons(ETH_P_IPV6);
- saddr.u.ip6 = ipv6_hdr(skb)->saddr;
+ saddr.src.ip6 = ipv6_hdr(skb)->saddr;
br_multicast_query_received(br, port, &br->ip6_other_query,
&saddr, max_delay);
@@ -2475,7 +2888,7 @@ br_multicast_leave_group(struct net_bridge *br,
for (p = mlock_dereference(mp->ports, br);
p != NULL;
p = mlock_dereference(p->next, br)) {
- if (p->port != port)
+ if (p->key.port != port)
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -2506,7 +2919,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip4 = group;
+ br_group.dst.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
@@ -2530,7 +2943,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip6 = *group;
+ br_group.dst.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
@@ -3235,7 +3648,7 @@ int br_multicast_list_adjacent(struct net_device *dev,
if (!entry)
goto unlock;
- entry->addr = group->addr;
+ entry->addr = group->key.addr;
list_add(&entry->list, br_ip_list);
count++;
}
@@ -3492,10 +3905,23 @@ void br_multicast_get_stats(const struct net_bridge *br,
int br_mdb_hash_init(struct net_bridge *br)
{
- return rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
+ int err;
+
+ err = rhashtable_init(&br->sg_port_tbl, &br_sg_port_rht_params);
+ if (err)
+ return err;
+
+ err = rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
+ if (err) {
+ rhashtable_destroy(&br->sg_port_tbl);
+ return err;
+ }
+
+ return 0;
}
void br_mdb_hash_fini(struct net_bridge *br)
{
+ rhashtable_destroy(&br->sg_port_tbl);
rhashtable_destroy(&br->mdb_hash_tbl);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a23d2bae56e1..345118e35c42 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -213,11 +213,14 @@ struct net_bridge_fdb_entry {
#define MDB_PG_FLAGS_PERMANENT BIT(0)
#define MDB_PG_FLAGS_OFFLOAD BIT(1)
#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
+#define MDB_PG_FLAGS_STAR_EXCL BIT(3)
+#define MDB_PG_FLAGS_BLOCKED BIT(4)
#define PG_SRC_ENT_LIMIT 32
#define BR_SGRP_F_DELETE BIT(0)
#define BR_SGRP_F_SEND BIT(1)
+#define BR_SGRP_F_INSTALLED BIT(2)
struct net_bridge_mcast_gc {
struct hlist_node gc_node;
@@ -238,14 +241,19 @@ struct net_bridge_group_src {
struct rcu_head rcu;
};
-struct net_bridge_port_group {
+struct net_bridge_port_group_sg_key {
struct net_bridge_port *port;
- struct net_bridge_port_group __rcu *next;
struct br_ip addr;
+};
+
+struct net_bridge_port_group {
+ struct net_bridge_port_group __rcu *next;
+ struct net_bridge_port_group_sg_key key;
unsigned char eth_addr[ETH_ALEN] __aligned(2);
unsigned char flags;
unsigned char filter_mode;
unsigned char grp_query_rexmit_cnt;
+ unsigned char rt_protocol;
struct hlist_head src_list;
unsigned int src_ents;
@@ -253,6 +261,7 @@ struct net_bridge_port_group {
struct timer_list rexmit_timer;
struct hlist_node mglist;
+ struct rhash_head rhnode;
struct net_bridge_mcast_gc mcast_gc;
struct rcu_head rcu;
};
@@ -440,6 +449,7 @@ struct net_bridge {
unsigned long multicast_startup_query_interval;
struct rhashtable mdb_hash_tbl;
+ struct rhashtable sg_port_tbl;
struct hlist_head mcast_gc_list;
struct hlist_head mdb_list;
@@ -804,7 +814,7 @@ struct net_bridge_port_group *
br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
- u8 filter_mode);
+ u8 filter_mode, u8 rt_protocol);
int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
@@ -825,6 +835,10 @@ void br_mdb_init(void);
void br_mdb_uninit(void);
void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
+ u8 filter_mode);
+void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
+ struct net_bridge_port_group *sg);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -873,6 +887,35 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
}
}
+static inline bool br_multicast_is_star_g(const struct br_ip *ip)
+{
+ switch (ip->proto) {
+ case htons(ETH_P_IP):
+ return ipv4_is_zeronet(ip->src.ip4);
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ return ipv6_addr_any(&ip->src.ip6);
+#endif
+ default:
+ return false;
+ }
+}
+
+static inline bool br_multicast_should_handle_mode(const struct net_bridge *br,
+ __be16 proto)
+{
+ switch (proto) {
+ case htons(ETH_P_IP):
+ return !!(br->multicast_igmp_version == 3);
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ return !!(br->multicast_mld_version == 2);
+#endif
+ default:
+ return false;
+ }
+}
+
static inline int br_multicast_igmp_type(const struct sk_buff *skb)
{
return BR_INPUT_SKB_CB(skb)->igmp;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index a0d1a3265b71..838efc682cff 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,7 +12,6 @@
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
-#include <linux/btf_ids.h>
DEFINE_BPF_STORAGE_CACHE(sk_cache);
@@ -379,19 +378,15 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.arg2_type = ARG_PTR_TO_SOCKET,
};
-BTF_ID_LIST(sk_storage_btf_ids)
-BTF_ID_UNUSED
-BTF_ID(struct, sock)
-
const struct bpf_func_proto sk_storage_get_btf_proto = {
.func = bpf_sk_storage_get,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
- .btf_id = sk_storage_btf_ids,
};
const struct bpf_func_proto sk_storage_delete_btf_proto = {
@@ -400,7 +395,7 @@ const struct bpf_func_proto sk_storage_delete_btf_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
- .btf_id = sk_storage_btf_ids,
+ .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
};
struct bpf_sk_storage_diag {
@@ -679,6 +674,7 @@ struct bpf_iter_seq_sk_storage_map_info {
static struct bpf_local_storage_elem *
bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
struct bpf_local_storage_elem *prev_selem)
+ __acquires(RCU) __releases(RCU)
{
struct bpf_local_storage *sk_storage;
struct bpf_local_storage_elem *selem;
@@ -697,16 +693,16 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
selem = prev_selem;
count = 0;
while (selem) {
- selem = hlist_entry_safe(selem->map_node.next,
+ selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
struct bpf_local_storage_elem, map_node);
if (!selem) {
/* not found, unlock and go to the next bucket */
b = &smap->buckets[bucket_id++];
- raw_spin_unlock_bh(&b->lock);
+ rcu_read_unlock();
skip_elems = 0;
break;
}
- sk_storage = rcu_dereference_raw(selem->local_storage);
+ sk_storage = rcu_dereference(selem->local_storage);
if (sk_storage) {
info->skip_elems = skip_elems + count;
return selem;
@@ -716,10 +712,10 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
b = &smap->buckets[i];
- raw_spin_lock_bh(&b->lock);
+ rcu_read_lock();
count = 0;
- hlist_for_each_entry(selem, &b->list, map_node) {
- sk_storage = rcu_dereference_raw(selem->local_storage);
+ hlist_for_each_entry_rcu(selem, &b->list, map_node) {
+ sk_storage = rcu_dereference(selem->local_storage);
if (sk_storage && count >= skip_elems) {
info->bucket_id = i;
info->skip_elems = count;
@@ -727,7 +723,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
}
count++;
}
- raw_spin_unlock_bh(&b->lock);
+ rcu_read_unlock();
skip_elems = 0;
}
@@ -786,7 +782,7 @@ static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
ctx.meta = &meta;
ctx.map = info->map;
if (selem) {
- sk_storage = rcu_dereference_raw(selem->local_storage);
+ sk_storage = rcu_dereference(selem->local_storage);
ctx.sk = sk_storage->owner;
ctx.value = SDATA(selem)->data;
}
@@ -802,18 +798,12 @@ static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
}
static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
{
- struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
- struct bpf_local_storage_map *smap;
- struct bpf_local_storage_map_bucket *b;
-
- if (!v) {
+ if (!v)
(void)__bpf_sk_storage_map_seq_show(seq, v);
- } else {
- smap = (struct bpf_local_storage_map *)info->map;
- b = &smap->buckets[info->bucket_id];
- raw_spin_unlock_bh(&b->lock);
- }
+ else
+ rcu_read_unlock();
}
static int bpf_iter_init_sk_storage_map(void *priv_data,
diff --git a/net/core/dev.c b/net/core/dev.c
index 38a172a63318..873b50ac9668 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5442,15 +5442,20 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
if (new) {
u32 i;
+ mutex_lock(&new->aux->used_maps_mutex);
+
/* generic XDP does not work with DEVMAPs that can
* have a bpf_prog installed on an entry
*/
for (i = 0; i < new->aux->used_map_cnt; i++) {
- if (dev_map_can_have_prog(new->aux->used_maps[i]))
- return -EINVAL;
- if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+ if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
+ cpu_map_prog_allowed(new->aux->used_maps[i])) {
+ mutex_unlock(&new->aux->used_maps_mutex);
return -EINVAL;
+ }
}
+
+ mutex_unlock(&new->aux->used_maps_mutex);
}
switch (xdp->command) {
@@ -10016,6 +10021,8 @@ int netdev_refcnt_read(const struct net_device *dev)
}
EXPORT_SYMBOL(netdev_refcnt_read);
+#define WAIT_REFS_MIN_MSECS 1
+#define WAIT_REFS_MAX_MSECS 250
/**
* netdev_wait_allrefs - wait until all references are gone.
* @dev: target net_device
@@ -10028,8 +10035,6 @@ EXPORT_SYMBOL(netdev_refcnt_read);
* We can get stuck here if buggy protocols don't correctly
* call dev_put.
*/
-#define WAIT_REFS_MIN_MSECS 1
-#define WAIT_REFS_MAX_MSECS 250
static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
diff --git a/net/core/filter.c b/net/core/filter.c
index 08f577114acc..706f8db0ccf8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3803,19 +3803,18 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-BTF_ID_LIST(bpf_skb_output_btf_ids)
-BTF_ID(struct, sk_buff)
+BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
const struct bpf_func_proto bpf_skb_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_skb_output_btf_ids[0],
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_skb_output_btf_ids,
};
static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -4199,19 +4198,18 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-BTF_ID_LIST(bpf_xdp_output_btf_ids)
-BTF_ID(struct, xdp_buff)
+BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
const struct bpf_func_proto bpf_xdp_output_proto = {
.func = bpf_xdp_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_xdp_output_btf_ids[0],
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_xdp_output_btf_ids,
};
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
@@ -4313,10 +4311,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-#define SOCKOPT_CC_REINIT (1 << 0)
-
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
- char *optval, int optlen, u32 flags)
+ char *optval, int optlen)
{
char devname[IFNAMSIZ];
int val, valbool;
@@ -4449,13 +4445,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
char name[TCP_CA_NAME_MAX];
- bool reinit = flags & SOCKOPT_CC_REINIT;
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false,
- reinit, true);
+ ret = tcp_set_congestion_control(sk, name, false, true);
} else {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -4615,9 +4609,7 @@ err_clear:
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
- u32 flags = 0;
- return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen,
- flags);
+ return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
}
static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
@@ -4651,11 +4643,7 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
- u32 flags = 0;
- if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
- flags |= SOCKOPT_CC_REINIT;
- return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen,
- flags);
+ return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}
static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
@@ -9908,17 +9896,6 @@ BTF_SOCK_TYPE_xxx
u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
#endif
-static bool check_arg_btf_id(u32 btf_id, u32 arg)
-{
- int i;
-
- /* only one argument, no need to check arg */
- for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
- if (btf_sock_ids[i] == btf_id)
- return true;
- return false;
-}
-
BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
{
/* tcp6_sock type is not generated in dwarf and hence btf,
@@ -9937,7 +9914,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
};
@@ -9954,7 +9931,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
};
@@ -9978,7 +9955,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
};
@@ -10002,7 +9979,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
};
@@ -10024,6 +10001,6 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 078386d7d9a2..e1f05e3fa1d0 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <linux/filter.h>
#include <linux/errno.h>
#include <linux/file.h>
@@ -382,7 +383,7 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
struct sock *sk;
sk = __sock_map_lookup_elem(map, *(u32 *)key);
- if (!sk || !sk_fullsock(sk))
+ if (!sk)
return NULL;
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
@@ -703,6 +704,109 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_map_seq_info {
+ struct bpf_map *map;
+ struct sock *sk;
+ u32 index;
+};
+
+struct bpf_iter__sockmap {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(void *, key);
+ __bpf_md_ptr(struct sock *, sk);
+};
+
+DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta,
+ struct bpf_map *map, void *key,
+ struct sock *sk)
+
+static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
+{
+ if (unlikely(info->index >= info->map->max_entries))
+ return NULL;
+
+ info->sk = __sock_map_lookup_elem(info->map, info->index);
+
+ /* can't return sk directly, since that might be NULL */
+ return info;
+}
+
+static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_map_seq_stop */
+ rcu_read_lock();
+ return sock_map_seq_lookup_elem(info);
+}
+
+static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ ++*pos;
+ ++info->index;
+
+ return sock_map_seq_lookup_elem(info);
+}
+
+static int sock_map_seq_show(struct seq_file *seq, void *v)
+{
+ struct sock_map_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !v);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (v) {
+ ctx.key = &info->index;
+ ctx.sk = info->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static void sock_map_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)sock_map_seq_show(seq, NULL);
+
+ /* pairs with sock_map_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_map_seq_ops = {
+ .start = sock_map_seq_start,
+ .next = sock_map_seq_next,
+ .stop = sock_map_seq_stop,
+ .show = sock_map_seq_show,
+};
+
+static int sock_map_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_map_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
+ .seq_ops = &sock_map_seq_ops,
+ .init_seq_private = sock_map_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_map_seq_info),
+};
+
static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -717,6 +821,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_stab",
.map_btf_id = &sock_map_btf_id,
+ .iter_seq_info = &sock_map_iter_seq_info,
};
struct bpf_shtab_elem {
@@ -953,7 +1058,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
if (!elem)
goto find_first_elem;
- elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
+ elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)),
struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
@@ -965,7 +1070,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
find_first_elem:
for (; i < htab->buckets_num; i++) {
head = &sock_hash_select_bucket(htab, i)->head;
- elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+ elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)),
struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
@@ -1110,7 +1215,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key)
struct sock *sk;
sk = __sock_hash_lookup_elem(map, key);
- if (!sk || !sk_fullsock(sk))
+ if (!sk)
return NULL;
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
@@ -1199,6 +1304,117 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_hash_seq_info {
+ struct bpf_map *map;
+ struct bpf_shtab *htab;
+ u32 bucket_id;
+};
+
+static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
+ struct bpf_shtab_elem *prev_elem)
+{
+ const struct bpf_shtab *htab = info->htab;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
+ struct hlist_node *node;
+
+ /* try to find next elem in the same bucket */
+ if (prev_elem) {
+ node = rcu_dereference(hlist_next_rcu(&prev_elem->node));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+
+ /* no more elements, continue in the next bucket */
+ info->bucket_id++;
+ }
+
+ for (; info->bucket_id < htab->buckets_num; info->bucket_id++) {
+ bucket = &htab->buckets[info->bucket_id];
+ node = rcu_dereference(hlist_first_rcu(&bucket->head));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+ }
+
+ return NULL;
+}
+
+static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_hash_seq_stop */
+ rcu_read_lock();
+ return sock_hash_seq_find_next(info, NULL);
+}
+
+static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ ++*pos;
+ return sock_hash_seq_find_next(info, v);
+}
+
+static int sock_hash_seq_show(struct seq_file *seq, void *v)
+{
+ struct sock_hash_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_shtab_elem *elem = v;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !elem);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (elem) {
+ ctx.key = elem->key;
+ ctx.sk = elem->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static void sock_hash_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)sock_hash_seq_show(seq, NULL);
+
+ /* pairs with sock_hash_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_hash_seq_ops = {
+ .start = sock_hash_seq_start,
+ .next = sock_hash_seq_next,
+ .stop = sock_hash_seq_stop,
+ .show = sock_hash_seq_show,
+};
+
+static int sock_hash_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_hash_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ info->htab = container_of(aux->map, struct bpf_shtab, map);
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
+ .seq_ops = &sock_hash_seq_ops,
+ .init_seq_private = sock_hash_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_hash_seq_info),
+};
+
static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -1213,6 +1429,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_shtab",
.map_btf_id = &sock_hash_map_btf_id,
+ .iter_seq_info = &sock_hash_iter_seq_info,
};
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1323,3 +1540,62 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+
+static int sock_map_iter_attach_target(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_map *map;
+ int err = -EINVAL;
+
+ if (!linfo->map.map_fd)
+ return -EBADF;
+
+ map = bpf_map_get_with_uref(linfo->map.map_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if (map->map_type != BPF_MAP_TYPE_SOCKMAP &&
+ map->map_type != BPF_MAP_TYPE_SOCKHASH)
+ goto put_map;
+
+ if (prog->aux->max_rdonly_access > map->key_size) {
+ err = -EACCES;
+ goto put_map;
+ }
+
+ aux->map = map;
+ return 0;
+
+put_map:
+ bpf_map_put_with_uref(map);
+ return err;
+}
+
+static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux)
+{
+ bpf_map_put_with_uref(aux->map);
+}
+
+static struct bpf_iter_reg sock_map_iter_reg = {
+ .target = "sockmap",
+ .attach_target = sock_map_iter_attach_target,
+ .detach_target = sock_map_iter_detach_target,
+ .show_fdinfo = bpf_iter_map_show_fdinfo,
+ .fill_link_info = bpf_iter_map_fill_link_info,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__sockmap, key),
+ PTR_TO_RDONLY_BUF_OR_NULL },
+ { offsetof(struct bpf_iter__sockmap, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+};
+
+static int __init bpf_sockmap_iter_init(void)
+{
+ sock_map_iter_reg.ctx_arg_info[1].btf_id =
+ btf_sock_ids[BTF_SOCK_TYPE_SOCK];
+ return bpf_iter_reg_target(&sock_map_iter_reg);
+}
+late_initcall(bpf_sockmap_iter_init);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2da656d984ef..0348dbab4131 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -7,6 +7,7 @@
#ifndef __DSA_PRIV_H
#define __DSA_PRIV_H
+#include <linux/if_bridge.h>
#include <linux/phy.h>
#include <linux/netdevice.h>
#include <linux/netpoll.h>
@@ -194,6 +195,71 @@ dsa_slave_to_master(const struct net_device *dev)
return dp->cpu_dp->master;
}
+/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged
+ * frames as untagged, since the bridge will not untag them.
+ */
+static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
+{
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+ struct net_device *br = dp->bridge_dev;
+ struct net_device *dev = skb->dev;
+ struct net_device *upper_dev;
+ struct list_head *iter;
+ u16 vid, pvid, proto;
+ int err;
+
+ if (!br || br_vlan_enabled(br))
+ return skb;
+
+ err = br_vlan_get_proto(br, &proto);
+ if (err)
+ return skb;
+
+ /* Move VLAN tag from data to hwaccel */
+ if (!skb_vlan_tag_present(skb) && hdr->h_vlan_proto == htons(proto)) {
+ skb = skb_vlan_untag(skb);
+ if (!skb)
+ return NULL;
+ }
+
+ if (!skb_vlan_tag_present(skb))
+ return skb;
+
+ vid = skb_vlan_tag_get_id(skb);
+
+ /* We already run under an RCU read-side critical section since
+ * we are called from netif_receive_skb_list_internal().
+ */
+ err = br_vlan_get_pvid_rcu(dev, &pvid);
+ if (err)
+ return skb;
+
+ if (vid != pvid)
+ return skb;
+
+ /* The sad part about attempting to untag from DSA is that we
+ * don't know, unless we check, if the skb will end up in
+ * the bridge's data path - br_allowed_ingress() - or not.
+ * For example, there might be an 8021q upper for the
+ * default_pvid of the bridge, which will steal VLAN-tagged traffic
+ * from the bridge's data path. This is a configuration that DSA
+ * supports because vlan_filtering is 0. In that case, we should
+ * definitely keep the tag, to make sure it keeps working.
+ */
+ netdev_for_each_upper_dev_rcu(dev, upper_dev, iter) {
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ if (vid == vlan_dev_vlan_id(upper_dev))
+ return skb;
+ }
+
+ __vlan_hwaccel_clear_tag(skb);
+
+ return skb;
+}
+
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index cc8512b5f9e2..1dab212a294f 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -140,6 +140,11 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
+ /* Set the MAC header to where it should point for
+ * dsa_untag_bridge_pvid() to parse the correct VLAN header.
+ */
+ skb_set_mac_header(skb, -ETH_HLEN);
+
skb->offload_fwd_mark = 1;
return skb;
@@ -191,7 +196,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
nskb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- return nskb;
+ return dsa_untag_bridge_pvid(nskb);
}
static const struct dsa_device_ops brcm_netdev_ops = {
@@ -219,8 +224,14 @@ static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
struct net_device *dev,
struct packet_type *pt)
{
+ struct sk_buff *nskb;
+
/* tag is prepended to the packet */
- return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+ nskb = brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+ if (!nskb)
+ return nskb;
+
+ return dsa_untag_bridge_pvid(nskb);
}
static const struct dsa_device_ops brcm_prepend_netdev_ops = {
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index b4fc05cafaa6..d1a7e224adff 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -137,6 +137,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
struct dsa_switch *ds = dp->ds;
struct ocelot *ocelot = ds->priv;
struct ocelot_port *ocelot_port;
@@ -159,9 +160,8 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
qos_class = skb->priority;
packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0);
- if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
-
+ /* TX timestamping was requested */
+ if (clone) {
rew_op = ocelot_port->ptp_cmd;
/* Retrieve timestamp ID populated inside skb->cb[0] of the
* clone by ocelot_port_add_txtstamp_skb
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index e3939f76b024..74a2ef598c31 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -28,23 +28,18 @@ static u32 unsupported_ops[] = {
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
-static int btf_sk_storage_get_ids[5];
static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly;
-
-static int btf_sk_storage_delete_ids[5];
static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly;
-static void convert_sk_func_proto(struct bpf_func_proto *to, int *to_btf_ids,
- const struct bpf_func_proto *from)
+static void convert_sk_func_proto(struct bpf_func_proto *to, const struct bpf_func_proto *from)
{
int i;
*to = *from;
- to->btf_id = to_btf_ids;
for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) {
if (to->arg_type[i] == ARG_PTR_TO_SOCKET) {
to->arg_type[i] = ARG_PTR_TO_BTF_ID;
- to->btf_id[i] = tcp_sock_id;
+ to->arg_btf_id[i] = &tcp_sock_id;
}
}
}
@@ -64,12 +59,8 @@ static int bpf_tcp_ca_init(struct btf *btf)
tcp_sock_id = type_id;
tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
- convert_sk_func_proto(&btf_sk_storage_get_proto,
- btf_sk_storage_get_ids,
- &bpf_sk_storage_get_proto);
- convert_sk_func_proto(&btf_sk_storage_delete_proto,
- btf_sk_storage_delete_ids,
- &bpf_sk_storage_delete_proto);
+ convert_sk_func_proto(&btf_sk_storage_get_proto, &bpf_sk_storage_get_proto);
+ convert_sk_func_proto(&btf_sk_storage_delete_proto, &bpf_sk_storage_delete_proto);
return 0;
}
@@ -185,8 +176,8 @@ static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
/* In case we want to report error later */
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &tcp_sock_id,
.arg2_type = ARG_ANYTHING,
- .btf_id = &tcp_sock_id,
};
static const struct bpf_func_proto *
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 65057744fac8..2a8bfa89a515 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2696,6 +2696,7 @@ int tcp_disconnect(struct sock *sk, int flags)
if (icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ icsk->icsk_ca_initialized = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
tcp_clear_retrans(tp);
@@ -3047,7 +3048,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
name[val] = 0;
lock_sock(sk);
- err = tcp_set_congestion_control(sk, name, true, true,
+ err = tcp_set_congestion_control(sk, name, true,
ns_capable(sock_net(sk)->user_ns,
CAP_NET_ADMIN));
release_sock(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 62878cf26d9c..db47ac24d057 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -176,7 +176,7 @@ void tcp_assign_congestion_control(struct sock *sk)
void tcp_init_congestion_control(struct sock *sk)
{
- const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
tcp_sk(sk)->prior_ssthresh = 0;
if (icsk->icsk_ca_ops->init)
@@ -185,6 +185,7 @@ void tcp_init_congestion_control(struct sock *sk)
INET_ECN_xmit(sk);
else
INET_ECN_dontxmit(sk);
+ icsk->icsk_ca_initialized = 1;
}
static void tcp_reinit_congestion_control(struct sock *sk,
@@ -340,7 +341,7 @@ out:
* already initialized.
*/
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
- bool reinit, bool cap_net_admin)
+ bool cap_net_admin)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
@@ -361,28 +362,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
goto out;
}
- if (!ca) {
+ if (!ca)
err = -ENOENT;
- } else if (!load) {
- const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
-
- if (bpf_try_module_get(ca, ca->owner)) {
- if (reinit) {
- tcp_reinit_congestion_control(sk, ca);
- } else {
- icsk->icsk_ca_ops = ca;
- bpf_module_put(old_ca, old_ca->owner);
- }
- } else {
- err = -EBUSY;
- }
- } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
+ else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin))
err = -EPERM;
- } else if (!bpf_try_module_get(ca, ca->owner)) {
+ else if (!bpf_try_module_get(ca, ca->owner))
err = -EBUSY;
- } else {
+ else
tcp_reinit_congestion_control(sk, ca);
- }
out:
rcu_read_unlock();
return err;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 50834e7f958e..02d0e2fb77c0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5885,8 +5885,10 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
tp->snd_cwnd_stamp = tcp_jiffies32;
+ icsk->icsk_ca_initialized = 0;
bpf_skops_established(sk, bpf_op, skb);
- tcp_init_congestion_control(sk);
+ if (!icsk->icsk_ca_initialized)
+ tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 5eb6662f562a..3895697f8540 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -33,12 +33,6 @@
static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
-{
- return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
- (xs->pool->fq || READ_ONCE(xs->fq_tmp));
-}
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -717,6 +711,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
dev, qid);
if (err) {
xp_destroy(xs->pool);
+ xs->pool = NULL;
sockfd_put(sock);
goto out_unlock;
}
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index da1f73e43924..b9e896cee5bb 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -39,7 +39,6 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk)
return (struct xdp_sock *)sk;
}
-bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry);
int xsk_map_inc(struct xsk_map *map);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 795d7c81c0ca..e63fadd000db 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -287,7 +287,7 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
return NULL;
dma_map->dma_pages = kvcalloc(nr_pages, sizeof(*dma_map->dma_pages), GFP_KERNEL);
- if (!dma_map) {
+ if (!dma_map->dma_pages) {
kfree(dma_map);
return NULL;
}
@@ -296,7 +296,7 @@ static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_devi
dma_map->dev = dev;
dma_map->dma_need_sync = false;
dma_map->dma_pages_cnt = nr_pages;
- refcount_set(&dma_map->users, 0);
+ refcount_set(&dma_map->users, 1);
list_add(&dma_map->list, &umem->xsk_dma_list);
return dma_map;
}
@@ -369,7 +369,6 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_
pool->dev = dma_map->dev;
pool->dma_pages_cnt = dma_map->dma_pages_cnt;
pool->dma_need_sync = dma_map->dma_need_sync;
- refcount_inc(&dma_map->users);
memcpy(pool->dma_pages, dma_map->dma_pages,
pool->dma_pages_cnt * sizeof(*pool->dma_pages));
@@ -390,6 +389,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
if (err)
return err;
+ refcount_inc(&dma_map->users);
return 0;
}
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 5bd8ea9d206a..c014217f5fa7 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -59,22 +59,20 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.num_pages = umem->npgs;
du.chunk_size = umem->chunk_size;
du.headroom = umem->headroom;
- du.ifindex = pool->netdev ? pool->netdev->ifindex : 0;
- du.queue_id = pool->queue_id;
+ du.ifindex = (pool && pool->netdev) ? pool->netdev->ifindex : 0;
+ du.queue_id = pool ? pool->queue_id : 0;
du.flags = 0;
if (umem->zc)
du.flags |= XDP_DU_F_ZEROCOPY;
du.refs = refcount_read(&umem->users);
err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
-
- if (!err && pool->fq)
+ if (!err && pool && pool->fq)
err = xsk_diag_put_ring(pool->fq,
XDP_DIAG_UMEM_FILL_RING, nlskb);
- if (!err && pool->cq) {
- err = xsk_diag_put_ring(pool->cq, XDP_DIAG_UMEM_COMPLETION_RING,
- nlskb);
- }
+ if (!err && pool && pool->cq)
+ err = xsk_diag_put_ring(pool->cq,
+ XDP_DIAG_UMEM_COMPLETION_RING, nlskb);
return err;
}
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 2a4fd6677155..0c5df593bc56 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -185,11 +185,6 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
xs = (struct xdp_sock *)sock->sk;
- if (!xsk_is_setup_for_bpf_map(xs)) {
- sockfd_put(sock);
- return -EOPNOTSUPP;
- }
-
map_entry = &m->xsk_map[i];
node = xsk_map_node_alloc(m, map_entry);
if (IS_ERR(node)) {
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 034800c4d1e6..b2f29bc8dc43 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -50,4 +50,5 @@ xdp_rxq_info
xdp_sample_pkts
xdp_tx_iptunnel
xdpsock
+xsk_fwd
testfile.img
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index 4dbee7427d47..7793f6a6ae7e 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -29,8 +29,8 @@ int main(int argc, char **argv)
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
struct bpf_program *prog;
struct bpf_object *obj;
+ const char *section;
char filename[256];
- const char *title;
FILE *f;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
@@ -58,8 +58,8 @@ int main(int argc, char **argv)
bpf_object__for_each_program(prog, obj) {
fd = bpf_program__fd(prog);
- title = bpf_program__title(prog, false);
- if (sscanf(title, "socket/%d", &key) != 1) {
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "socket/%d", &key) != 1) {
fprintf(stderr, "ERROR: finding prog failed\n");
goto cleanup;
}
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index 847da9284fa8..f090d0dc60d6 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -17,7 +17,7 @@ int main(int ac, char **argv)
long key, next_key, value;
struct bpf_program *prog;
int map_fd, i, j = 0;
- const char *title;
+ const char *section;
struct ksym *sym;
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
@@ -51,8 +51,8 @@ int main(int ac, char **argv)
}
bpf_object__for_each_program(prog, obj) {
- title = bpf_program__title(prog, false);
- if (sscanf(title, "kprobe/%s", symbol) != 1)
+ section = bpf_program__section_name(prog);
+ if (sscanf(section, "kprobe/%s", symbol) != 1)
continue;
/* Attach prog only when symbol exists */
diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c
index 8def45c5b697..b0200c8eac09 100644
--- a/samples/bpf/test_map_in_map_kern.c
+++ b/samples/bpf/test_map_in_map_kern.c
@@ -103,10 +103,9 @@ static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port)
return result ? *result : -ENOENT;
}
-SEC("kprobe/" SYSCALL(sys_connect))
+SEC("kprobe/__sys_connect")
int trace_sys_connect(struct pt_regs *ctx)
{
- struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx);
struct sockaddr_in6 *in6;
u16 test_case, port, dst6[8];
int addrlen, ret, inline_ret, ret_key = 0;
@@ -114,8 +113,8 @@ int trace_sys_connect(struct pt_regs *ctx)
void *outer_map, *inner_map;
bool inline_hash = false;
- in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs);
- addrlen = (int)PT_REGS_PARM3_CORE(real_regs);
+ in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx);
+ addrlen = (int)PT_REGS_PARM3_CORE(ctx);
if (addrlen != sizeof(*in6))
return 0;
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index 98dad57a96c4..c17d3fb5fd64 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -39,8 +39,8 @@ int main(int ac, char **argv)
struct bpf_program *prog;
struct bpf_object *obj;
int key, fd, progs_fd;
+ const char *section;
char filename[256];
- const char *title;
FILE *f;
setrlimit(RLIMIT_MEMLOCK, &r);
@@ -78,9 +78,9 @@ int main(int ac, char **argv)
}
bpf_object__for_each_program(prog, obj) {
- title = bpf_program__title(prog, false);
+ section = bpf_program__section_name(prog);
/* register only syscalls to PROG_ARRAY */
- if (sscanf(title, "kprobe/%d", &key) != 1)
+ if (sscanf(section, "kprobe/%d", &key) != 1)
continue;
fd = bpf_program__fd(prog);
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 004c0622c913..3dd366e9474d 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -111,7 +111,7 @@ static void print_avail_progs(struct bpf_object *obj)
bpf_object__for_each_program(pos, obj) {
if (bpf_program__is_xdp(pos))
- printf(" %s\n", bpf_program__title(pos, false));
+ printf(" %s\n", bpf_program__section_name(pos));
}
}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 4cead341ae57..b220173dbe1e 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -78,6 +78,7 @@ static int opt_pkt_count;
static u16 opt_pkt_size = MIN_PKT_SIZE;
static u32 opt_pkt_fill_pattern = 0x12345678;
static bool opt_extra_stats;
+static bool opt_quiet;
static int opt_poll;
static int opt_interval = 1;
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
@@ -718,6 +719,7 @@ static struct option long_options[] = {
{"tx-pkt-size", required_argument, 0, 's'},
{"tx-pkt-pattern", required_argument, 0, 'P'},
{"extra-stats", no_argument, 0, 'x'},
+ {"quiet", no_argument, 0, 'Q'},
{0, 0, 0, 0}
};
@@ -753,6 +755,7 @@ static void usage(const char *prog)
" Min size: %d, Max size %d.\n"
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
" -x, --extra-stats Display extra statistics.\n"
+ " -Q, --quiet Do not display any stats.\n"
"\n";
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
@@ -768,7 +771,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:x",
+ c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQ",
long_options, &option_index);
if (c == -1)
break;
@@ -852,6 +855,9 @@ static void parse_command_line(int argc, char **argv)
case 'x':
opt_extra_stats = 1;
break;
+ case 'Q':
+ opt_quiet = 1;
+ break;
default:
usage(basename(argv[0]));
}
@@ -897,6 +903,14 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
if (!xsk->outstanding_tx)
return;
+ /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to
+ * really send the packets. In zero-copy mode we do not have to do this, since Tx
+ * is driven by the NAPI loop. So as an optimization, we do not have to call
+ * sendto() all the time in zero-copy mode for l2fwd.
+ */
+ if (opt_xdp_bind_flags & XDP_COPY)
+ kick_tx(xsk);
+
ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
xsk->outstanding_tx;
@@ -1117,6 +1131,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
while (ret != rcvd) {
if (ret < 0)
exit_with_error(-ret);
+ complete_tx_l2fwd(xsk, fds);
if (xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
@@ -1277,9 +1292,11 @@ int main(int argc, char **argv)
setlocale(LC_ALL, "");
- ret = pthread_create(&pt, NULL, poller, NULL);
- if (ret)
- exit_with_error(ret);
+ if (!opt_quiet) {
+ ret = pthread_create(&pt, NULL, poller, NULL);
+ if (ret)
+ exit_with_error(ret);
+ }
prev_time = get_nsecs();
start_time = prev_time;
@@ -1293,7 +1310,8 @@ int main(int argc, char **argv)
benchmark_done = true;
- pthread_join(pt, NULL);
+ if (!opt_quiet)
+ pthread_join(pt, NULL);
xdpsock_cleanup();
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index e6e2d9e5ff48..dbde59d343b1 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -341,9 +341,9 @@ fi
vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o}
# fill in BTF IDs
-if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then
-info BTFIDS vmlinux
-${RESOLVE_BTFIDS} vmlinux
+if [ -n "${CONFIG_DEBUG_INFO_BTF}" -a -n "${CONFIG_BPF}" ]; then
+ info BTFIDS vmlinux
+ ${RESOLVE_BTFIDS} vmlinux
fi
if [ -n "${CONFIG_BUILDTIME_TABLE_SORT}" ]; then
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 815ac9804aee..f33cb02de95c 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -19,7 +19,7 @@ man8dir = $(mandir)/man8
# Load targets for building eBPF helpers man page.
include ../../Makefile.helpers
-MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst))
+MAN8_RST = $(wildcard bpftool*.rst)
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
@@ -28,12 +28,23 @@ man: man8 helpers
man8: $(DOC_MAN8)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+RST2MAN_OPTS += --verbose
+
+list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST))))
+see_also = $(subst " ",, \
+ "\n" \
+ "SEE ALSO\n" \
+ "========\n" \
+ "\t**bpf**\ (2),\n" \
+ "\t**bpf-helpers**\\ (7)" \
+ $(foreach page,$(call list_pages,$(1)),",\n\t**$(page)**\\ (8)") \
+ "\n")
$(OUTPUT)%.8: %.rst
ifndef RST2MAN_DEP
$(error "rst2man not found, but required to generate man pages")
endif
- $(QUIET_GEN)rst2man $< > $@
+ $(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
clean: helpers-clean
$(call QUIET_CLEAN, Documentation)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 896f4c6c2870..ff4d327a582e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -71,26 +71,12 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
**# bpftool btf dump id 1226**
+
::
[1] PTR '(anon)' type_id=2
@@ -104,6 +90,7 @@ EXAMPLES
This gives an example of default output for all supported BTF kinds.
**$ cat prog.c**
+
::
struct fwd_struct;
@@ -144,6 +131,7 @@ This gives an example of default output for all supported BTF kinds.
}
**$ bpftool btf dump file prog.o**
+
::
[1] PTR '(anon)' type_id=2
@@ -229,20 +217,3 @@ All the standard ways to specify map or program are supported:
**# bpftool btf dump prog tag b88e0a09b1d9759d**
**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index a226aee3574f..790944c35602 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -116,26 +116,11 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
Show file names of pinned programs.
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
EXAMPLES
========
|
@@ -158,19 +143,3 @@ EXAMPLES
::
ID AttachType AttachFlags Name
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 8609f06e71de..dd3771bdbc57 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -71,35 +71,4 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
+ .. include:: common_options.rst
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index df85dbd962c0..84cf0639696f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -126,26 +126,12 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON,
- this option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
**$ cat example.c**
+
::
#include <stdbool.h>
@@ -187,6 +173,7 @@ This is example BPF application with two BPF programs and a mix of BPF maps
and global variables.
**$ bpftool gen skeleton example.o**
+
::
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
@@ -241,6 +228,7 @@ and global variables.
#endif /* __EXAMPLE_SKEL_H__ */
**$ cat example_user.c**
+
::
#include "example.skel.h"
@@ -283,6 +271,7 @@ and global variables.
}
**# ./example_user**
+
::
my_map name: my_map
@@ -290,19 +279,3 @@ and global variables.
my_static_var: 7
This is a stripped-out version of skeleton generated for above example code.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 070ffacb42b5..51f49bead619 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -51,16 +51,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -77,19 +68,3 @@ EXAMPLES
Create a file-based bpf iterator from bpf_iter_hashmap.o and map with
id 20, and pin it to /sys/fs/bpf/my_hashmap
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 4a52e7a93339..5f7db2a837cc 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -21,7 +21,7 @@ LINK COMMANDS
| **bpftool** **link { show | list }** [*LINK*]
| **bpftool** **link pin** *LINK* *FILE*
-| **bpftool** **link detach *LINK*
+| **bpftool** **link detach** *LINK*
| **bpftool** **link help**
|
| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
@@ -62,18 +62,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
When showing BPF links, show file names of pinned
@@ -83,10 +72,6 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf.
-
EXAMPLES
========
**# bpftool link show**
@@ -121,20 +106,3 @@ EXAMPLES
::
-rw------- 1 root root 0 Apr 23 21:39 link
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 083db6c2fc67..dade10cdf295 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -23,7 +23,8 @@ MAP COMMANDS
| **bpftool** **map** { **show** | **list** } [*MAP*]
| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
-| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \
+| [**dev** *NAME*]
| **bpftool** **map dump** *MAP*
| **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*]
| **bpftool** **map lookup** *MAP* [**key** *DATA*]
@@ -67,7 +68,7 @@ DESCRIPTION
maps. On such kernels bpftool will automatically emit this
information as well.
- **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
+ **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*]
Create a new map with given parameters and pin it to *bpffs*
as *FILE*.
@@ -75,6 +76,11 @@ DESCRIPTION
desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h
UAPI header for existing flags).
+ To create maps of type array-of-maps or hash-of-maps, the
+ **inner_map** keyword must be used to pass an inner map. The
+ kernel needs it to collect metadata related to the inner maps
+ that the new map will work with.
+
Keyword **dev** expects a network interface name, and is used
to request hardware offload for the map.
@@ -155,18 +161,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
Show file names of pinned maps.
@@ -175,13 +170,10 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
-
EXAMPLES
========
**# bpftool map show**
+
::
10: hash name some_map flags 0x0
@@ -203,6 +195,7 @@ The following three commands are equivalent:
**# bpftool map dump id 10**
+
::
key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
@@ -210,6 +203,7 @@ The following three commands are equivalent:
Found 2 elements
**# bpftool map getnext id 10 key 0 1 2 3**
+
::
key:
@@ -276,19 +270,3 @@ would be lost as soon as bpftool exits).
key: 00 00 00 00 value: 22 02 00 00
Found 1 element
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index aa7450736179..d8165d530937 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -75,22 +75,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -187,20 +172,3 @@ EXAMPLES
::
xdp:
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index 9c592b7c6775..e958ce91de72 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -40,22 +40,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available from libbpf, including debug-level
- information.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -78,20 +63,3 @@ EXAMPLES
{"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
{"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
{"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 82e356b664e8..358c7309d419 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -210,18 +210,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-f, --bpffs
When showing BPF programs, show file names of pinned
@@ -234,11 +223,6 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
-
EXAMPLES
========
**# bpftool prog show**
@@ -342,19 +326,3 @@ EXAMPLES
40176203 cycles (83.05%)
42518139 instructions # 1.06 insns per cycle (83.39%)
123 llc_misses # 2.89 LLC misses per million insns (83.15%)
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
index d93cd1cb8b0f..506e70ee78e9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -60,23 +60,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short generic help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
+ .. include:: common_options.rst
EXAMPLES
========
@@ -98,20 +82,3 @@ EXAMPLES
::
Registered tcp_congestion_ops cubic id 110
-
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 420d4d5df8b6..e7d949334961 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -46,18 +46,7 @@ DESCRIPTION
OPTIONS
=======
- -h, --help
- Print short help message (similar to **bpftool help**).
-
- -V, --version
- Print version number (similar to **bpftool version**).
-
- -j, --json
- Generate JSON output. For commands that cannot produce JSON, this
- option has no effect.
-
- -p, --pretty
- Generate human-readable JSON output. Implies **-j**.
+ .. include:: common_options.rst
-m, --mapcompat
Allow loading maps with unknown map definitions.
@@ -65,24 +54,3 @@ OPTIONS
-n, --nomount
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
-
- -d, --debug
- Print all logs available, even debug-level information. This
- includes logs from libbpf as well as from the verifier, when
- attempting to load programs.
-
-SEE ALSO
-========
- **bpf**\ (2),
- **bpf-helpers**\ (7),
- **bpftool-btf**\ (8),
- **bpftool-cgroup**\ (8),
- **bpftool-feature**\ (8),
- **bpftool-gen**\ (8),
- **bpftool-iter**\ (8),
- **bpftool-link**\ (8),
- **bpftool-map**\ (8),
- **bpftool-net**\ (8),
- **bpftool-perf**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst
new file mode 100644
index 000000000000..05d06c74dcaa
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/common_options.rst
@@ -0,0 +1,22 @@
+-h, --help
+ Print short help message (similar to **bpftool help**).
+
+-V, --version
+ Print version number (similar to **bpftool version**), and optional
+ features that were included when bpftool was compiled. Optional
+ features include linking against libbfd to provide the disassembler
+ for JIT-ted programs (**bpftool prog dump jited**) and usage of BPF
+ skeletons (some features like **bpftool prog profile** or showing
+ pids associated to BPF objects may rely on it).
+
+-j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+-p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+-d, --debug
+ Print all logs available, even debug-level information. This includes
+ logs from libbpf as well as from the verifier, when attempting to
+ load programs.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 7b68e3c0a5fb..3f1da30c4da6 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -709,9 +709,26 @@ _bpftool()
"$cur" ) )
return 0
;;
- key|value|flags|name|entries)
+ key|value|flags|entries)
return 0
;;
+ inner_map)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ case $pprev in
+ inner_map)
+ _bpftool_get_map_names
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+ ;;
*)
_bpftool_once_attr 'type'
_bpftool_once_attr 'key'
@@ -719,6 +736,9 @@ _bpftool()
_bpftool_once_attr 'entries'
_bpftool_once_attr 'name'
_bpftool_once_attr 'flags'
+ if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then
+ _bpftool_once_attr 'inner_map'
+ fi
_bpftool_once_attr 'dev'
return 0
;;
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index 86501cd3c763..7fea83bedf48 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -119,6 +119,12 @@ void jsonw_pretty(json_writer_t *self, bool on)
self->pretty = on;
}
+void jsonw_reset(json_writer_t *self)
+{
+ assert(self->depth == 0);
+ self->sep = '\0';
+}
+
/* Basic blocks */
static void jsonw_begin(json_writer_t *self, int c)
{
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h
index 35cf1f00f96c..8ace65cdb92f 100644
--- a/tools/bpf/bpftool/json_writer.h
+++ b/tools/bpf/bpftool/json_writer.h
@@ -27,6 +27,9 @@ void jsonw_destroy(json_writer_t **self_p);
/* Cause output to have pretty whitespace */
void jsonw_pretty(json_writer_t *self, bool on);
+/* Reset separator to create new JSON */
+void jsonw_reset(json_writer_t *self);
+
/* Add property name */
void jsonw_name(json_writer_t *self, const char *name);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 4a191fcbeb82..682daaa49e6a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -70,13 +70,42 @@ static int do_help(int argc, char **argv)
static int do_version(int argc, char **argv)
{
+#ifdef HAVE_LIBBFD_SUPPORT
+ const bool has_libbfd = true;
+#else
+ const bool has_libbfd = false;
+#endif
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+ const bool has_skeletons = false;
+#else
+ const bool has_skeletons = true;
+#endif
+
if (json_output) {
- jsonw_start_object(json_wtr);
+ jsonw_start_object(json_wtr); /* root object */
+
jsonw_name(json_wtr, "version");
jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION);
- jsonw_end_object(json_wtr);
+
+ jsonw_name(json_wtr, "features");
+ jsonw_start_object(json_wtr); /* features */
+ jsonw_bool_field(json_wtr, "libbfd", has_libbfd);
+ jsonw_bool_field(json_wtr, "skeletons", has_skeletons);
+ jsonw_end_object(json_wtr); /* features */
+
+ jsonw_end_object(json_wtr); /* root object */
} else {
+ unsigned int nb_features = 0;
+
printf("%s v%s\n", bin_name, BPFTOOL_VERSION);
+ printf("features:");
+ if (has_libbfd) {
+ printf(" libbfd");
+ nb_features++;
+ }
+ if (has_skeletons)
+ printf("%s skeletons", nb_features++ ? "," : "");
+ printf("\n");
}
return 0;
}
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index bc0071228f88..a7efbd84fbcc 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -213,8 +213,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
jsonw_end_object(json_wtr);
}
-static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
- const char *error_msg)
+static void
+print_entry_error_msg(struct bpf_map_info *info, unsigned char *key,
+ const char *error_msg)
{
int msg_size = strlen(error_msg);
bool single_line, break_names;
@@ -232,6 +233,40 @@ static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
printf("\n");
}
+static void
+print_entry_error(struct bpf_map_info *map_info, void *key, int lookup_errno)
+{
+ /* For prog_array maps or arrays of maps, failure to lookup the value
+ * means there is no entry for that key. Do not print an error message
+ * in that case.
+ */
+ if ((map_is_map_of_maps(map_info->type) ||
+ map_is_map_of_progs(map_info->type)) && lookup_errno == ENOENT)
+ return;
+
+ if (json_output) {
+ jsonw_start_object(json_wtr); /* entry */
+ jsonw_name(json_wtr, "key");
+ print_hex_data_json(key, map_info->key_size);
+ jsonw_name(json_wtr, "value");
+ jsonw_start_object(json_wtr); /* error */
+ jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
+ jsonw_end_object(json_wtr); /* error */
+ jsonw_end_object(json_wtr); /* entry */
+ } else {
+ const char *msg = NULL;
+
+ if (lookup_errno == ENOENT)
+ msg = "<no entry>";
+ else if (lookup_errno == ENOSPC &&
+ map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+ msg = "<cannot read>";
+
+ print_entry_error_msg(map_info, key,
+ msg ? : strerror(lookup_errno));
+ }
+}
+
static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
unsigned char *value)
{
@@ -713,56 +748,23 @@ static int dump_map_elem(int fd, void *key, void *value,
struct bpf_map_info *map_info, struct btf *btf,
json_writer_t *btf_wtr)
{
- int num_elems = 0;
- int lookup_errno;
-
- if (!bpf_map_lookup_elem(fd, key, value)) {
- if (json_output) {
- print_entry_json(map_info, key, value, btf);
- } else {
- if (btf) {
- struct btf_dumper d = {
- .btf = btf,
- .jw = btf_wtr,
- .is_plain_text = true,
- };
-
- do_dump_btf(&d, map_info, key, value);
- } else {
- print_entry_plain(map_info, key, value);
- }
- num_elems++;
- }
- return num_elems;
+ if (bpf_map_lookup_elem(fd, key, value)) {
+ print_entry_error(map_info, key, errno);
+ return -1;
}
- /* lookup error handling */
- lookup_errno = errno;
-
- if (map_is_map_of_maps(map_info->type) ||
- map_is_map_of_progs(map_info->type))
- return 0;
-
if (json_output) {
- jsonw_start_object(json_wtr);
- jsonw_name(json_wtr, "key");
- print_hex_data_json(key, map_info->key_size);
- jsonw_name(json_wtr, "value");
- jsonw_start_object(json_wtr);
- jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
- jsonw_end_object(json_wtr);
- jsonw_end_object(json_wtr);
- } else {
- const char *msg = NULL;
-
- if (lookup_errno == ENOENT)
- msg = "<no entry>";
- else if (lookup_errno == ENOSPC &&
- map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
- msg = "<cannot read>";
+ print_entry_json(map_info, key, value, btf);
+ } else if (btf) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
- print_entry_error(map_info, key,
- msg ? : strerror(lookup_errno));
+ do_dump_btf(&d, map_info, key, value);
+ } else {
+ print_entry_plain(map_info, key, value);
}
return 0;
@@ -873,7 +875,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
err = 0;
break;
}
- num_elems += dump_map_elem(fd, key, value, info, btf, wtr);
+ if (!dump_map_elem(fd, key, value, info, btf, wtr))
+ num_elems++;
prev_key = key;
}
@@ -1247,7 +1250,7 @@ static int do_create(int argc, char **argv)
{
struct bpf_create_map_attr attr = { NULL, };
const char *pinfile;
- int err, fd;
+ int err = -1, fd;
if (!REQ_ARGS(7))
return -1;
@@ -1262,13 +1265,13 @@ static int do_create(int argc, char **argv)
if (attr.map_type) {
p_err("map type already specified");
- return -1;
+ goto exit;
}
attr.map_type = map_type_from_str(*argv);
if ((int)attr.map_type < 0) {
p_err("unrecognized map type: %s", *argv);
- return -1;
+ goto exit;
}
NEXT_ARG();
} else if (is_prefix(*argv, "name")) {
@@ -1277,43 +1280,56 @@ static int do_create(int argc, char **argv)
} else if (is_prefix(*argv, "key")) {
if (parse_u32_arg(&argc, &argv, &attr.key_size,
"key size"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "value")) {
if (parse_u32_arg(&argc, &argv, &attr.value_size,
"value size"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "entries")) {
if (parse_u32_arg(&argc, &argv, &attr.max_entries,
"max entries"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "flags")) {
if (parse_u32_arg(&argc, &argv, &attr.map_flags,
"flags"))
- return -1;
+ goto exit;
} else if (is_prefix(*argv, "dev")) {
NEXT_ARG();
if (attr.map_ifindex) {
p_err("offload device already specified");
- return -1;
+ goto exit;
}
attr.map_ifindex = if_nametoindex(*argv);
if (!attr.map_ifindex) {
p_err("unrecognized netdevice '%s': %s",
*argv, strerror(errno));
- return -1;
+ goto exit;
}
NEXT_ARG();
+ } else if (is_prefix(*argv, "inner_map")) {
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int inner_map_fd;
+
+ NEXT_ARG();
+ if (!REQ_ARGS(2))
+ usage();
+ inner_map_fd = map_parse_fd_and_info(&argc, &argv,
+ &info, &len);
+ if (inner_map_fd < 0)
+ return -1;
+ attr.inner_map_fd = inner_map_fd;
} else {
p_err("unknown arg %s", *argv);
- return -1;
+ goto exit;
}
}
if (!attr.name) {
p_err("map name not specified");
- return -1;
+ goto exit;
}
set_max_rlimit();
@@ -1321,17 +1337,22 @@ static int do_create(int argc, char **argv)
fd = bpf_create_map_xattr(&attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
- return -1;
+ goto exit;
}
err = do_pin_fd(fd, pinfile);
close(fd);
if (err)
- return err;
+ goto exit;
if (json_output)
jsonw_null(json_wtr);
- return 0;
+
+exit:
+ if (attr.inner_map_fd > 0)
+ close(attr.inner_map_fd);
+
+ return err;
}
static int do_pop_dequeue(int argc, char **argv)
@@ -1417,7 +1438,7 @@ static int do_help(int argc, char **argv)
"Usage: %1$s %2$s { show | list } [MAP]\n"
" %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
" entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
- " [dev NAME]\n"
+ " [inner_map MAP] [dev NAME]\n"
" %1$s %2$s dump MAP\n"
" %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
" %1$s %2$s lookup MAP [key DATA]\n"
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index d393eb8263a6..d942c1e3372c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -29,6 +29,9 @@
#include "main.h"
#include "xlated_dumper.h"
+#define BPF_METADATA_PREFIX "bpf_metadata_"
+#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1)
+
const char * const prog_type_name[] = {
[BPF_PROG_TYPE_UNSPEC] = "unspec",
[BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
@@ -151,6 +154,198 @@ static void show_prog_maps(int fd, __u32 num_maps)
}
}
+static void *find_metadata(int prog_fd, struct bpf_map_info *map_info)
+{
+ struct bpf_prog_info prog_info;
+ __u32 prog_info_len;
+ __u32 map_info_len;
+ void *value = NULL;
+ __u32 *map_ids;
+ int nr_maps;
+ int key = 0;
+ int map_fd;
+ int ret;
+ __u32 i;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info_len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ return NULL;
+
+ if (!prog_info.nr_map_ids)
+ return NULL;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+ if (!map_ids)
+ return NULL;
+
+ nr_maps = prog_info.nr_map_ids;
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.nr_map_ids = nr_maps;
+ prog_info.map_ids = ptr_to_u64(map_ids);
+ prog_info_len = sizeof(prog_info);
+
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ goto free_map_ids;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ map_fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (map_fd < 0)
+ goto free_map_ids;
+
+ memset(map_info, 0, sizeof(*map_info));
+ map_info_len = sizeof(*map_info);
+ ret = bpf_obj_get_info_by_fd(map_fd, map_info, &map_info_len);
+ if (ret < 0) {
+ close(map_fd);
+ goto free_map_ids;
+ }
+
+ if (map_info->type != BPF_MAP_TYPE_ARRAY ||
+ map_info->key_size != sizeof(int) ||
+ map_info->max_entries != 1 ||
+ !map_info->btf_value_type_id ||
+ !strstr(map_info->name, ".rodata")) {
+ close(map_fd);
+ continue;
+ }
+
+ value = malloc(map_info->value_size);
+ if (!value) {
+ close(map_fd);
+ goto free_map_ids;
+ }
+
+ if (bpf_map_lookup_elem(map_fd, &key, value)) {
+ close(map_fd);
+ free(value);
+ value = NULL;
+ goto free_map_ids;
+ }
+
+ close(map_fd);
+ break;
+ }
+
+free_map_ids:
+ free(map_ids);
+ return value;
+}
+
+static bool has_metadata_prefix(const char *s)
+{
+ return strncmp(s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) == 0;
+}
+
+static void show_prog_metadata(int fd, __u32 num_maps)
+{
+ const struct btf_type *t_datasec, *t_var;
+ struct bpf_map_info map_info;
+ struct btf_var_secinfo *vsi;
+ bool printed_header = false;
+ struct btf *btf = NULL;
+ unsigned int i, vlen;
+ void *value = NULL;
+ const char *name;
+ int err;
+
+ if (!num_maps)
+ return;
+
+ memset(&map_info, 0, sizeof(map_info));
+ value = find_metadata(fd, &map_info);
+ if (!value)
+ return;
+
+ err = btf__get_from_id(map_info.btf_id, &btf);
+ if (err || !btf)
+ goto out_free;
+
+ t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
+ if (!btf_is_datasec(t_datasec))
+ goto out_free;
+
+ vlen = btf_vlen(t_datasec);
+ vsi = btf_var_secinfos(t_datasec);
+
+ /* We don't proceed to check the kinds of the elements of the DATASEC.
+ * The verifier enforces them to be BTF_KIND_VAR.
+ */
+
+ if (json_output) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = json_wtr,
+ .is_plain_text = false,
+ };
+
+ for (i = 0; i < vlen; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ name = btf__name_by_offset(btf, t_var->name_off);
+
+ if (!has_metadata_prefix(name))
+ continue;
+
+ if (!printed_header) {
+ jsonw_name(json_wtr, "metadata");
+ jsonw_start_object(json_wtr);
+ printed_header = true;
+ }
+
+ jsonw_name(json_wtr, name + BPF_METADATA_PREFIX_LEN);
+ err = btf_dumper_type(&d, t_var->type, value + vsi->offset);
+ if (err) {
+ p_err("btf dump failed: %d", err);
+ break;
+ }
+ }
+ if (printed_header)
+ jsonw_end_object(json_wtr);
+ } else {
+ json_writer_t *btf_wtr = jsonw_new(stdout);
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
+
+ if (!btf_wtr) {
+ p_err("jsonw alloc failed");
+ goto out_free;
+ }
+
+ for (i = 0; i < vlen; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ name = btf__name_by_offset(btf, t_var->name_off);
+
+ if (!has_metadata_prefix(name))
+ continue;
+
+ if (!printed_header) {
+ printf("\tmetadata:");
+ printed_header = true;
+ }
+
+ printf("\n\t\t%s = ", name + BPF_METADATA_PREFIX_LEN);
+
+ jsonw_reset(btf_wtr);
+ err = btf_dumper_type(&d, t_var->type, value + vsi->offset);
+ if (err) {
+ p_err("btf dump failed: %d", err);
+ break;
+ }
+ }
+ if (printed_header)
+ jsonw_destroy(&btf_wtr);
+ }
+
+out_free:
+ btf__free(btf);
+ free(value);
+}
+
static void print_prog_header_json(struct bpf_prog_info *info)
{
jsonw_uint_field(json_wtr, "id", info->id);
@@ -228,6 +423,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
emit_obj_refs_json(&refs_table, info->id, json_wtr);
+ show_prog_metadata(fd, info->nr_map_ids);
+
jsonw_end_object(json_wtr);
}
@@ -297,6 +494,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
printf("\n");
+
+ show_prog_metadata(fd, info->nr_map_ids);
}
static int show_prog(int fd)
@@ -1304,7 +1503,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
enum bpf_prog_type prog_type = common_prog_type;
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
- const char *sec_name = bpf_program__title(pos, false);
+ const char *sec_name = bpf_program__section_name(pos);
err = get_prog_type_by_name(sec_name, &prog_type,
&expected_attach_type);
@@ -1398,7 +1597,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
err = bpf_obj_pin(bpf_program__fd(prog), pinfile);
if (err) {
p_err("failed to pin program %s",
- bpf_program__title(prog, false));
+ bpf_program__section_name(prog));
goto err_close_obj;
}
} else {
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index fe8eb537688b..66cb92136de4 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
include ../../scripts/Makefile.include
+include ../../scripts/Makefile.arch
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
@@ -29,6 +30,7 @@ endif
AR = $(HOSTAR)
CC = $(HOSTCC)
LD = $(HOSTLD)
+ARCH = $(HOSTARCH)
OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 210b086188a3..57890b357f85 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -76,6 +76,13 @@ extern u32 name[];
#define BTF_ID_LIST_GLOBAL(name) \
__BTF_ID_LIST(name, globl)
+/* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
+ * a single entry.
+ */
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) \
+ BTF_ID_LIST(name) \
+ BTF_ID(prefix, typename)
+
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
* It's used when we want to define 'unused' entry
@@ -140,6 +147,7 @@ extern struct btf_id_set name;
#define BTF_ID(prefix, name)
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
#define BTF_SET_END(name)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8dda13880957..a22812561064 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -124,6 +124,7 @@ enum bpf_cmd {
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
BPF_LINK_DETACH,
+ BPF_PROG_BIND_MAP,
};
enum bpf_map_type {
@@ -658,6 +659,12 @@ union bpf_attr {
__u32 flags;
} iter_create;
+ struct { /* struct used by BPF_PROG_BIND_MAP command */
+ __u32 prog_fd;
+ __u32 map_fd;
+ __u32 flags; /* extra flags */
+ } prog_bind_map;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1447,8 +1454,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if the *skb* task belongs to the cgroup2.
- * * 1, if the *skb* task does not belong to the cgroup2.
+ * * 0, if current task belongs to the cgroup2.
+ * * 1, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
@@ -3349,38 +3356,38 @@ union bpf_attr {
* Description
* Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
* Description
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
* Return
- * *sk* if casting is valid, or NULL otherwise.
+ * *sk* if casting is valid, or **NULL** otherwise.
*
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *task*, which is a valid
- * pointer to struct task_struct. To store the stacktrace, the
- * bpf program provides *buf* with a nonnegative *size*.
+ * pointer to **struct task_struct**. To store the stacktrace, the
+ * bpf program provides *buf* with a nonnegative *size*.
*
* The last argument, *flags*, holds the number of stack frames to
* skip (from 0 to 255), masked with
@@ -3410,12 +3417,12 @@ union bpf_attr {
* long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
* Description
* Load header option. Support reading a particular TCP header
- * option for bpf program (BPF_PROG_TYPE_SOCK_OPS).
+ * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**).
*
* If *flags* is 0, it will search the option from the
- * sock_ops->skb_data. The comment in "struct bpf_sock_ops"
+ * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops**
* has details on what skb_data contains under different
- * sock_ops->op.
+ * *skops*\ **->op**.
*
* The first byte of the *searchby_res* specifies the
* kind that it wants to search.
@@ -3435,7 +3442,7 @@ union bpf_attr {
* [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
*
* To search for the standard window scale option (3),
- * the searchby_res should be [ 3, 0, 0, .... 0 ].
+ * the *searchby_res* should be [ 3, 0, 0, .... 0 ].
* Note, kind-length must be 0 for regular option.
*
* Searching for No-Op (0) and End-of-Option-List (1) are
@@ -3445,27 +3452,30 @@ union bpf_attr {
* of a header option.
*
* Supported flags:
+ *
* * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the
* saved_syn packet or the just-received syn packet.
*
* Return
- * >0 when found, the header option is copied to *searchby_res*.
- * The return value is the total length copied.
+ * > 0 when found, the header option is copied to *searchby_res*.
+ * The return value is the total length copied. On failure, a
+ * negative error code is returned:
*
- * **-EINVAL** If param is invalid
+ * **-EINVAL** if a parameter is invalid.
*
- * **-ENOMSG** The option is not found
+ * **-ENOMSG** if the option is not found.
*
- * **-ENOENT** No syn packet available when
- * **BPF_LOAD_HDR_OPT_TCP_SYN** is used
+ * **-ENOENT** if no syn packet is available when
+ * **BPF_LOAD_HDR_OPT_TCP_SYN** is used.
*
- * **-ENOSPC** Not enough space. Only *len* number of
- * bytes are copied.
+ * **-ENOSPC** if there is not enough space. Only *len* number of
+ * bytes are copied.
*
- * **-EFAULT** Cannot parse the header options in the packet
+ * **-EFAULT** on failure to parse the header options in the
+ * packet.
*
- * **-EPERM** This helper cannot be used under the
- * current sock_ops->op.
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
*
* long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags)
* Description
@@ -3483,44 +3493,44 @@ union bpf_attr {
* by searching the same option in the outgoing skb.
*
* This helper can only be called during
- * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
*
* Return
* 0 on success, or negative error in case of failure:
*
- * **-EINVAL** If param is invalid
+ * **-EINVAL** If param is invalid.
*
- * **-ENOSPC** Not enough space in the header.
- * Nothing has been written
+ * **-ENOSPC** if there is not enough space in the header.
+ * Nothing has been written
*
- * **-EEXIST** The option has already existed
+ * **-EEXIST** if the option already exists.
*
- * **-EFAULT** Cannot parse the existing header options
+ * **-EFAULT** on failrue to parse the existing header options.
*
- * **-EPERM** This helper cannot be used under the
- * current sock_ops->op.
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
*
* long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags)
* Description
* Reserve *len* bytes for the bpf header option. The
- * space will be used by bpf_store_hdr_opt() later in
- * BPF_SOCK_OPS_WRITE_HDR_OPT_CB.
+ * space will be used by **bpf_store_hdr_opt**\ () later in
+ * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**.
*
- * If bpf_reserve_hdr_opt() is called multiple times,
+ * If **bpf_reserve_hdr_opt**\ () is called multiple times,
* the total number of bytes will be reserved.
*
* This helper can only be called during
- * BPF_SOCK_OPS_HDR_OPT_LEN_CB.
+ * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**.
*
* Return
* 0 on success, or negative error in case of failure:
*
- * **-EINVAL** if param is invalid
+ * **-EINVAL** if a parameter is invalid.
*
- * **-ENOSPC** Not enough space in the header.
+ * **-ENOSPC** if there is not enough space in the header.
*
- * **-EPERM** This helper cannot be used under the
- * current sock_ops->op.
+ * **-EPERM** if the helper cannot be used under the current
+ * *skops*\ **->op**.
*
* void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags)
* Description
@@ -3560,9 +3570,9 @@ union bpf_attr {
*
* long bpf_d_path(struct path *path, char *buf, u32 sz)
* Description
- * Return full path for given 'struct path' object, which
- * needs to be the kernel BTF 'path' object. The path is
- * returned in the provided buffer 'buf' of size 'sz' and
+ * Return full path for given **struct path** object, which
+ * needs to be the kernel BTF *path* object. The path is
+ * returned in the provided buffer *buf* of size *sz* and
* is zero terminated.
*
* Return
@@ -3573,7 +3583,7 @@ union bpf_attr {
* long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
* Description
* Read *size* bytes from user space address *user_ptr* and store
- * the data in *dst*. This is a wrapper of copy_from_user().
+ * the data in *dst*. This is a wrapper of **copy_from_user**\ ().
* Return
* 0 on success, or a negative error in case of failure.
*/
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 82b983ff6569..2baa1308737c 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -872,3 +872,19 @@ int bpf_enable_stats(enum bpf_stats_type type)
return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
}
+
+int bpf_prog_bind_map(int prog_fd, int map_fd,
+ const struct bpf_prog_bind_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_prog_bind_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_bind_map.prog_fd = prog_fd;
+ attr.prog_bind_map.map_fd = map_fd;
+ attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
+
+ return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 015d13f25fcc..8c1ac4b42f90 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -243,6 +243,14 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
+struct bpf_prog_bind_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+};
+#define bpf_prog_bind_opts__last_field flags
+
+LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd,
+ const struct bpf_prog_bind_opts *opts);
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 91f0ad0e0325..2a55320d87d0 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -57,14 +57,16 @@ LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
__u32 *size);
-LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **func_info, __u32 *cnt);
-LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
- const struct btf_ext *btf_ext,
- const char *sec_name, __u32 insns_cnt,
- void **line_info, __u32 *cnt);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_func_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **func_info, __u32 *cnt);
+LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
+int btf_ext__reloc_line_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **line_info, __u32 *cnt);
LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 46d727b45c81..32dc444224d8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -73,8 +73,6 @@
#define __printf(a, b) __attribute__((format(printf, a, b)))
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj,
- int idx);
static const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
@@ -176,6 +174,8 @@ enum kern_feature_id {
FEAT_EXP_ATTACH_TYPE,
/* bpf_probe_read_{kernel,user}[_str] helpers */
FEAT_PROBE_READ_KERN,
+ /* BPF_PROG_BIND_MAP is supported */
+ FEAT_PROG_BIND_MAP,
__FEAT_CNT,
};
@@ -193,6 +193,7 @@ struct reloc_desc {
int insn_idx;
int map_idx;
int sym_off;
+ bool processed;
};
struct bpf_sec_def;
@@ -217,20 +218,45 @@ struct bpf_sec_def {
* linux/filter.h.
*/
struct bpf_program {
- /* Index in elf obj file, for relocation use. */
- int idx;
- char *name;
- int prog_ifindex;
- char *section_name;
const struct bpf_sec_def *sec_def;
- /* section_name with / replaced by _; makes recursive pinning
+ char *sec_name;
+ size_t sec_idx;
+ /* this program's instruction offset (in number of instructions)
+ * within its containing ELF section
+ */
+ size_t sec_insn_off;
+ /* number of original instructions in ELF section belonging to this
+ * program, not taking into account subprogram instructions possible
+ * appended later during relocation
+ */
+ size_t sec_insn_cnt;
+ /* Offset (in number of instructions) of the start of instruction
+ * belonging to this BPF program within its containing main BPF
+ * program. For the entry-point (main) BPF program, this is always
+ * zero. For a sub-program, this gets reset before each of main BPF
+ * programs are processed and relocated and is used to determined
+ * whether sub-program was already appended to the main program, and
+ * if yes, at which instruction offset.
+ */
+ size_t sub_insn_off;
+
+ char *name;
+ /* sec_name with / replaced by _; makes recursive pinning
* in bpf_object__pin_programs easier
*/
char *pin_name;
+
+ /* instructions that belong to BPF program; insns[0] is located at
+ * sec_insn_off instruction within its ELF section in ELF file, so
+ * when mapping ELF file instruction index to the local instruction,
+ * one needs to subtract sec_insn_off; and vice versa.
+ */
struct bpf_insn *insns;
- size_t insns_cnt, main_prog_cnt;
- enum bpf_prog_type type;
- bool load;
+ /* actual number of instruction in this BPF program's image; for
+ * entry-point BPF programs this includes the size of main program
+ * itself plus all the used sub-programs, appended at the end
+ */
+ size_t insns_cnt;
struct reloc_desc *reloc_desc;
int nr_reloc;
@@ -246,7 +272,10 @@ struct bpf_program {
void *priv;
bpf_program_clear_priv_t clear_priv;
+ bool load;
+ enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
+ int prog_ifindex;
__u32 attach_btf_id;
__u32 attach_prog_fd;
void *func_info;
@@ -382,9 +411,10 @@ struct bpf_object {
struct extern_desc *externs;
int nr_extern;
int kconfig_map_idx;
+ int rodata_map_idx;
bool loaded;
- bool has_pseudo_calls;
+ bool has_subcalls;
/*
* Information when doing elf related work. Only valid if fd
@@ -446,6 +476,8 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+ size_t off, __u32 sym_type, GElf_Sym *sym);
void bpf_program__unload(struct bpf_program *prog)
{
@@ -486,152 +518,160 @@ static void bpf_program__exit(struct bpf_program *prog)
bpf_program__unload(prog);
zfree(&prog->name);
- zfree(&prog->section_name);
+ zfree(&prog->sec_name);
zfree(&prog->pin_name);
zfree(&prog->insns);
zfree(&prog->reloc_desc);
prog->nr_reloc = 0;
prog->insns_cnt = 0;
- prog->idx = -1;
+ prog->sec_idx = -1;
}
static char *__bpf_program__pin_name(struct bpf_program *prog)
{
char *name, *p;
- name = p = strdup(prog->section_name);
+ name = p = strdup(prog->sec_name);
while ((p = strchr(p, '/')))
*p = '_';
return name;
}
+static bool insn_is_subprog_call(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_CALL &&
+ BPF_SRC(insn->code) == BPF_K &&
+ insn->src_reg == BPF_PSEUDO_CALL &&
+ insn->dst_reg == 0 &&
+ insn->off == 0;
+}
+
static int
-bpf_program__init(void *data, size_t size, const char *section_name, int idx,
- struct bpf_program *prog)
+bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
+ const char *name, size_t sec_idx, const char *sec_name,
+ size_t sec_off, void *insn_data, size_t insn_data_sz)
{
- const size_t bpf_insn_sz = sizeof(struct bpf_insn);
+ int i;
- if (size == 0 || size % bpf_insn_sz) {
- pr_warn("corrupted section '%s', size: %zu\n",
- section_name, size);
+ if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
+ pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
+ sec_name, name, sec_off, insn_data_sz);
return -EINVAL;
}
memset(prog, 0, sizeof(*prog));
+ prog->obj = obj;
+
+ prog->sec_idx = sec_idx;
+ prog->sec_insn_off = sec_off / BPF_INSN_SZ;
+ prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
+ /* insns_cnt can later be increased by appending used subprograms */
+ prog->insns_cnt = prog->sec_insn_cnt;
+
+ prog->type = BPF_PROG_TYPE_UNSPEC;
+ prog->load = true;
- prog->section_name = strdup(section_name);
- if (!prog->section_name) {
- pr_warn("failed to alloc name for prog under section(%d) %s\n",
- idx, section_name);
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
+
+ prog->sec_name = strdup(sec_name);
+ if (!prog->sec_name)
+ goto errout;
+
+ prog->name = strdup(name);
+ if (!prog->name)
goto errout;
- }
prog->pin_name = __bpf_program__pin_name(prog);
- if (!prog->pin_name) {
- pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
- idx, section_name);
+ if (!prog->pin_name)
goto errout;
- }
- prog->insns = malloc(size);
- if (!prog->insns) {
- pr_warn("failed to alloc insns for prog under section %s\n",
- section_name);
+ prog->insns = malloc(insn_data_sz);
+ if (!prog->insns)
goto errout;
+ memcpy(prog->insns, insn_data, insn_data_sz);
+
+ for (i = 0; i < prog->insns_cnt; i++) {
+ if (insn_is_subprog_call(&prog->insns[i])) {
+ obj->has_subcalls = true;
+ break;
+ }
}
- prog->insns_cnt = size / bpf_insn_sz;
- memcpy(prog->insns, data, size);
- prog->idx = idx;
- prog->instances.fds = NULL;
- prog->instances.nr = -1;
- prog->type = BPF_PROG_TYPE_UNSPEC;
- prog->load = true;
return 0;
errout:
+ pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
bpf_program__exit(prog);
return -ENOMEM;
}
static int
-bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
- const char *section_name, int idx)
+bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
+ const char *sec_name, int sec_idx)
{
- struct bpf_program prog, *progs;
+ struct bpf_program *prog, *progs;
+ void *data = sec_data->d_buf;
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
int nr_progs, err;
-
- err = bpf_program__init(data, size, section_name, idx, &prog);
- if (err)
- return err;
+ const char *name;
+ GElf_Sym sym;
progs = obj->programs;
nr_progs = obj->nr_programs;
+ sec_off = 0;
- progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
- if (!progs) {
- /*
- * In this case the original obj->programs
- * is still valid, so don't need special treat for
- * bpf_close_object().
- */
- pr_warn("failed to alloc a new program under section '%s'\n",
- section_name);
- bpf_program__exit(&prog);
- return -ENOMEM;
- }
-
- pr_debug("elf: found program '%s'\n", prog.section_name);
- obj->programs = progs;
- obj->nr_programs = nr_progs + 1;
- prog.obj = obj;
- progs[nr_progs] = prog;
- return 0;
-}
-
-static int
-bpf_object__init_prog_names(struct bpf_object *obj)
-{
- Elf_Data *symbols = obj->efile.symbols;
- struct bpf_program *prog;
- size_t pi, si;
+ while (sec_off < sec_sz) {
+ if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
+ pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- for (pi = 0; pi < obj->nr_programs; pi++) {
- const char *name = NULL;
+ prog_sz = sym.st_size;
- prog = &obj->programs[pi];
+ name = elf_sym_str(obj, sym.st_name);
+ if (!name) {
+ pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) {
- GElf_Sym sym;
+ if (sec_off + prog_sz > sec_sz) {
+ pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
+ sec_name, sec_off);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
- if (!gelf_getsym(symbols, si, &sym))
- continue;
- if (sym.st_shndx != prog->idx)
- continue;
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
- continue;
+ pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
+ sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
- name = elf_sym_str(obj, sym.st_name);
- if (!name) {
- pr_warn("prog '%s': failed to get symbol name\n",
- prog->section_name);
- return -LIBBPF_ERRNO__LIBELF;
- }
+ progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
+ if (!progs) {
+ /*
+ * In this case the original obj->programs
+ * is still valid, so don't need special treat for
+ * bpf_close_object().
+ */
+ pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
+ sec_name, name);
+ return -ENOMEM;
}
+ obj->programs = progs;
- if (!name && prog->idx == obj->efile.text_shndx)
- name = ".text";
+ prog = &progs[nr_progs];
- if (!name) {
- pr_warn("prog '%s': failed to find program symbol\n",
- prog->section_name);
- return -EINVAL;
- }
+ err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
+ sec_off, data + sec_off, prog_sz);
+ if (err)
+ return err;
- prog->name = strdup(name);
- if (!prog->name)
- return -ENOMEM;
+ nr_progs++;
+ obj->nr_programs = nr_progs;
+
+ sec_off += prog_sz;
}
return 0;
@@ -1033,6 +1073,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.bss_shndx = -1;
obj->efile.st_ops_shndx = -1;
obj->kconfig_map_idx = -1;
+ obj->rodata_map_idx = -1;
obj->kern_version = get_kernel_version();
obj->loaded = false;
@@ -1391,6 +1432,8 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
obj->efile.rodata->d_size);
if (err)
return err;
+
+ obj->rodata_map_idx = obj->nr_maps - 1;
}
if (obj->efile.bss_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
@@ -2675,6 +2718,26 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
return data;
}
+static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
+ size_t off, __u32 sym_type, GElf_Sym *sym)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ size_t n = symbols->d_size / sizeof(GElf_Sym);
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (!gelf_getsym(symbols, i, sym))
+ continue;
+ if (sym->st_shndx != sec_idx || sym->st_value != off)
+ continue;
+ if (GELF_ST_TYPE(sym->st_info) != sym_type)
+ continue;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
static bool is_sec_name_dwarf(const char *name)
{
/* approximation, but the actual list is too long */
@@ -2715,19 +2778,55 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
return false;
}
+static int cmp_progs(const void *_a, const void *_b)
+{
+ const struct bpf_program *a = _a;
+ const struct bpf_program *b = _b;
+
+ if (a->sec_idx != b->sec_idx)
+ return a->sec_idx < b->sec_idx ? -1 : 1;
+
+ /* sec_insn_off can't be the same within the section */
+ return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
+}
+
static int bpf_object__elf_collect(struct bpf_object *obj)
{
Elf *elf = obj->efile.elf;
Elf_Data *btf_ext_data = NULL;
Elf_Data *btf_data = NULL;
- Elf_Scn *scn = NULL;
int idx = 0, err = 0;
+ const char *name;
+ Elf_Data *data;
+ Elf_Scn *scn;
+ GElf_Shdr sh;
+ /* a bunch of ELF parsing functionality depends on processing symbols,
+ * so do the first pass and find the symbol table
+ */
+ scn = NULL;
while ((scn = elf_nextscn(elf, scn)) != NULL) {
- const char *name;
- GElf_Shdr sh;
- Elf_Data *data;
+ if (elf_sec_hdr(obj, scn, &sh))
+ return -LIBBPF_ERRNO__FORMAT;
+
+ if (sh.sh_type == SHT_SYMTAB) {
+ if (obj->efile.symbols) {
+ pr_warn("elf: multiple symbol tables in %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ data = elf_sec_data(obj, scn);
+ if (!data)
+ return -LIBBPF_ERRNO__FORMAT;
+
+ obj->efile.symbols = data;
+ obj->efile.symbols_shndx = elf_ndxscn(scn);
+ obj->efile.strtabidx = sh.sh_link;
+ }
+ }
+
+ scn = NULL;
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
idx++;
if (elf_sec_hdr(obj, scn, &sh))
@@ -2766,20 +2865,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
btf_ext_data = data;
} else if (sh.sh_type == SHT_SYMTAB) {
- if (obj->efile.symbols) {
- pr_warn("elf: multiple symbol tables in %s\n", obj->path);
- return -LIBBPF_ERRNO__FORMAT;
- }
- obj->efile.symbols = data;
- obj->efile.symbols_shndx = idx;
- obj->efile.strtabidx = sh.sh_link;
+ /* already processed during the first pass above */
} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
if (sh.sh_flags & SHF_EXECINSTR) {
if (strcmp(name, ".text") == 0)
obj->efile.text_shndx = idx;
- err = bpf_object__add_program(obj, data->d_buf,
- data->d_size,
- name, idx);
+ err = bpf_object__add_programs(obj, data, name, idx);
if (err)
return err;
} else if (strcmp(name, DATA_SEC) == 0) {
@@ -2833,6 +2924,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
return -LIBBPF_ERRNO__FORMAT;
}
+
+ /* sort BPF programs by section name and in-section instruction offset
+ * for faster search */
+ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
+
return bpf_object__init_btf(obj, btf_data, btf_ext_data);
}
@@ -3157,20 +3253,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return 0;
}
-static struct bpf_program *
-bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
-{
- struct bpf_program *prog;
- size_t i;
-
- for (i = 0; i < obj->nr_programs; i++) {
- prog = &obj->programs[i];
- if (prog->idx == idx)
- return prog;
- }
- return NULL;
-}
-
struct bpf_program *
bpf_object__find_program_by_title(const struct bpf_object *obj,
const char *title)
@@ -3178,12 +3260,18 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
struct bpf_program *pos;
bpf_object__for_each_program(pos, obj) {
- if (pos->section_name && !strcmp(pos->section_name, title))
+ if (pos->sec_name && !strcmp(pos->sec_name, title))
return pos;
}
return NULL;
}
+static bool prog_is_subprog(const struct bpf_object *obj,
+ const struct bpf_program *prog)
+{
+ return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
+}
+
struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object *obj,
const char *name)
@@ -3191,6 +3279,8 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
struct bpf_program *prog;
bpf_object__for_each_program(prog, obj) {
+ if (prog_is_subprog(obj, prog))
+ continue;
if (!strcmp(prog->name, name))
return prog;
}
@@ -3240,6 +3330,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
const char *sym_sec_name;
struct bpf_map *map;
+ reloc_desc->processed = false;
+
/* sub-program call relocation */
if (insn->code == (BPF_JMP | BPF_CALL)) {
if (insn->src_reg != BPF_PSEUDO_CALL) {
@@ -3261,7 +3353,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
reloc_desc->type = RELO_CALL;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = sym->st_value;
- obj->has_pseudo_calls = true;
return 0;
}
@@ -3361,14 +3452,50 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
+static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
+{
+ return insn_idx >= prog->sec_insn_off &&
+ insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
+}
+
+static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
+ size_t sec_idx, size_t insn_idx)
+{
+ int l = 0, r = obj->nr_programs - 1, m;
+ struct bpf_program *prog;
+
+ while (l < r) {
+ m = l + (r - l + 1) / 2;
+ prog = &obj->programs[m];
+
+ if (prog->sec_idx < sec_idx ||
+ (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
+ l = m;
+ else
+ r = m - 1;
+ }
+ /* matching program could be at index l, but it still might be the
+ * wrong one, so we need to double check conditions for the last time
+ */
+ prog = &obj->programs[l];
+ if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
+ return prog;
+ return NULL;
+}
+
static int
-bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
- Elf_Data *data, struct bpf_object *obj)
+bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)
{
Elf_Data *symbols = obj->efile.symbols;
const char *relo_sec_name, *sec_name;
size_t sec_idx = shdr->sh_info;
+ struct bpf_program *prog;
+ struct reloc_desc *relos;
int err, i, nrels;
+ const char *sym_name;
+ __u32 insn_idx;
+ GElf_Sym sym;
+ GElf_Rel rel;
relo_sec_name = elf_sec_str(obj, shdr->sh_name);
sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
@@ -3379,19 +3506,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
relo_sec_name, sec_idx, sec_name);
nrels = shdr->sh_size / shdr->sh_entsize;
- prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
- if (!prog->reloc_desc) {
- pr_warn("failed to alloc memory in relocation\n");
- return -ENOMEM;
- }
- prog->nr_reloc = nrels;
-
for (i = 0; i < nrels; i++) {
- const char *sym_name;
- __u32 insn_idx;
- GElf_Sym sym;
- GElf_Rel rel;
-
if (!gelf_getrel(data, i, &rel)) {
pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
return -LIBBPF_ERRNO__FORMAT;
@@ -3408,15 +3523,42 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
}
insn_idx = rel.r_offset / BPF_INSN_SZ;
- sym_name = elf_sym_str(obj, sym.st_name) ?: "<?>";
+ /* relocations against static functions are recorded as
+ * relocations against the section that contains a function;
+ * in such case, symbol will be STT_SECTION and sym.st_name
+ * will point to empty string (0), so fetch section name
+ * instead
+ */
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0)
+ sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx));
+ else
+ sym_name = elf_sym_str(obj, sym.st_name);
+ sym_name = sym_name ?: "<?";
pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
relo_sec_name, i, insn_idx, sym_name);
- err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog) {
+ pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+ relo_sec_name, i, sec_name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ relos = libbpf_reallocarray(prog->reloc_desc,
+ prog->nr_reloc + 1, sizeof(*relos));
+ if (!relos)
+ return -ENOMEM;
+ prog->reloc_desc = relos;
+
+ /* adjust insn_idx to local BPF program frame of reference */
+ insn_idx -= prog->sec_insn_off;
+ err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
insn_idx, sym_name, &sym, &rel);
if (err)
return err;
+
+ prog->nr_reloc++;
}
return 0;
}
@@ -3758,6 +3900,52 @@ static int probe_kern_probe_read_kernel(void)
return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
}
+static int probe_prog_bind_map(void)
+{
+ struct bpf_load_program_attr prg_attr;
+ struct bpf_create_map_attr map_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret, map, prog;
+
+ memset(&map_attr, 0, sizeof(map_attr));
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+ map_attr.key_size = sizeof(int);
+ map_attr.value_size = 32;
+ map_attr.max_entries = 1;
+
+ map = bpf_create_map_xattr(&map_attr);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ memset(&prg_attr, 0, sizeof(prg_attr));
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ prg_attr.insns = insns;
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
+ prg_attr.license = "GPL";
+
+ prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
+ if (prog < 0) {
+ close(map);
+ return 0;
+ }
+
+ ret = bpf_prog_bind_map(prog, map, NULL);
+
+ close(map);
+ close(prog);
+
+ return ret >= 0;
+}
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -3798,6 +3986,9 @@ static struct kern_feature_desc {
},
[FEAT_PROBE_READ_KERN] = {
"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ },
+ [FEAT_PROG_BIND_MAP] = {
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
}
};
@@ -4089,75 +4280,6 @@ err_out:
return err;
}
-static int
-check_btf_ext_reloc_err(struct bpf_program *prog, int err,
- void *btf_prog_info, const char *info_name)
-{
- if (err != -ENOENT) {
- pr_warn("Error in loading %s for sec %s.\n",
- info_name, prog->section_name);
- return err;
- }
-
- /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
-
- if (btf_prog_info) {
- /*
- * Some info has already been found but has problem
- * in the last btf_ext reloc. Must have to error out.
- */
- pr_warn("Error in relocating %s for sec %s.\n",
- info_name, prog->section_name);
- return err;
- }
-
- /* Have problem loading the very first info. Ignore the rest. */
- pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
- info_name, prog->section_name, info_name);
- return 0;
-}
-
-static int
-bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
- const char *section_name, __u32 insn_offset)
-{
- int err;
-
- if (!insn_offset || prog->func_info) {
- /*
- * !insn_offset => main program
- *
- * For sub prog, the main program's func_info has to
- * be loaded first (i.e. prog->func_info != NULL)
- */
- err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
- section_name, insn_offset,
- &prog->func_info,
- &prog->func_info_cnt);
- if (err)
- return check_btf_ext_reloc_err(prog, err,
- prog->func_info,
- "bpf_func_info");
-
- prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
- }
-
- if (!insn_offset || prog->line_info) {
- err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
- section_name, insn_offset,
- &prog->line_info,
- &prog->line_info_cnt);
- if (err)
- return check_btf_ext_reloc_err(prog, err,
- prog->line_info,
- "bpf_line_info");
-
- prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
- }
-
- return 0;
-}
-
#define BPF_CORE_SPEC_MAX_LEN 64
/* represents BPF CO-RE field or array element accessor */
@@ -4927,8 +5049,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
*val = sz;
} else {
pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
+ prog->name, relo->kind, relo->insn_off / 8);
return -EINVAL;
}
if (validate)
@@ -4950,8 +5071,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,
if (byte_sz >= 8) {
/* bitfield can't be read with 64-bit read */
pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
- bpf_program__title(prog, false),
- relo->kind, relo->insn_off / 8);
+ prog->name, relo->kind, relo->insn_off / 8);
return -E2BIG;
}
byte_sz *= 2;
@@ -5116,8 +5236,8 @@ static int bpf_core_calc_relo(const struct bpf_program *prog,
} else if (err == -EOPNOTSUPP) {
/* EOPNOTSUPP means unknown/unsupported relocation */
pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
- bpf_program__title(prog, false), relo_idx,
- core_relo_kind_str(relo->kind), relo->kind, relo->insn_off / 8);
+ prog->name, relo_idx, core_relo_kind_str(relo->kind),
+ relo->kind, relo->insn_off / 8);
}
return err;
@@ -5131,7 +5251,7 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
int insn_idx, struct bpf_insn *insn)
{
pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- bpf_program__title(prog, false), relo_idx, insn_idx);
+ prog->name, relo_idx, insn_idx);
insn->code = BPF_JMP | BPF_CALL;
insn->dst_reg = 0;
insn->src_reg = 0;
@@ -5175,6 +5295,11 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
if (relo->insn_off % BPF_INSN_SZ)
return -EINVAL;
insn_idx = relo->insn_off / BPF_INSN_SZ;
+ /* adjust insn_idx from section frame of reference to the local
+ * program's frame of reference; (sub-)program code is not yet
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
insn = &prog->insns[insn_idx];
class = BPF_CLASS(insn->code);
@@ -5198,14 +5323,14 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
return -EINVAL;
if (res->validate && insn->imm != orig_val) {
pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), relo_idx,
+ prog->name, relo_idx,
insn_idx, insn->imm, orig_val, new_val);
return -EINVAL;
}
orig_val = insn->imm;
insn->imm = new_val;
pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
- bpf_program__title(prog, false), relo_idx, insn_idx,
+ prog->name, relo_idx, insn_idx,
orig_val, new_val);
break;
case BPF_LDX:
@@ -5213,21 +5338,18 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
case BPF_STX:
if (res->validate && insn->off != orig_val) {
pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, insn->off, orig_val, new_val);
+ prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
return -EINVAL;
}
if (new_val > SHRT_MAX) {
pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, new_val);
+ prog->name, relo_idx, insn_idx, new_val);
return -ERANGE;
}
orig_val = insn->off;
insn->off = new_val;
pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- bpf_program__title(prog, false), relo_idx, insn_idx,
- orig_val, new_val);
+ prog->name, relo_idx, insn_idx, orig_val, new_val);
break;
case BPF_LD: {
__u64 imm;
@@ -5238,14 +5360,14 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
insn[1].code != 0 || insn[1].dst_reg != 0 ||
insn[1].src_reg != 0 || insn[1].off != 0) {
pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
- bpf_program__title(prog, false), relo_idx, insn_idx);
+ prog->name, relo_idx, insn_idx);
return -EINVAL;
}
imm = insn[0].imm + ((__u64)insn[1].imm << 32);
if (res->validate && imm != orig_val) {
pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
- bpf_program__title(prog, false), relo_idx,
+ prog->name, relo_idx,
insn_idx, (unsigned long long)imm,
orig_val, new_val);
return -EINVAL;
@@ -5254,15 +5376,14 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
insn[0].imm = new_val;
insn[1].imm = 0; /* currently only 32-bit values are supported */
pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
- bpf_program__title(prog, false), relo_idx, insn_idx,
+ prog->name, relo_idx, insn_idx,
(unsigned long long)imm, new_val);
break;
}
default:
pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
- bpf_program__title(prog, false), relo_idx,
- insn_idx, insn->code, insn->src_reg, insn->dst_reg,
- insn->off, insn->imm);
+ prog->name, relo_idx, insn_idx, insn->code,
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
return -EINVAL;
}
@@ -5392,7 +5513,6 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
const struct btf *targ_btf,
struct hashmap *cand_cache)
{
- const char *prog_name = bpf_program__title(prog, false);
struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
const void *type_key = u32_as_hash_key(relo->type_id);
struct bpf_core_relo_res cand_res, targ_res;
@@ -5419,13 +5539,13 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
if (err) {
pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
- prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
str_is_empty(local_name) ? "<anon>" : local_name,
spec_str, err);
return -EINVAL;
}
- pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
relo_idx, core_relo_kind_str(relo->kind), relo->kind);
bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
libbpf_print(LIBBPF_DEBUG, "\n");
@@ -5442,7 +5562,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
/* libbpf doesn't support candidate search for anonymous types */
if (str_is_empty(spec_str)) {
pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
- prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
return -EOPNOTSUPP;
}
@@ -5450,7 +5570,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
if (IS_ERR(cand_ids)) {
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
- prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ prog->name, relo_idx, local_id, btf_kind_str(local_type),
local_name, PTR_ERR(cand_ids));
return PTR_ERR(cand_ids);
}
@@ -5466,13 +5586,13 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
if (err < 0) {
pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
- prog_name, relo_idx, i);
+ prog->name, relo_idx, i);
bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
libbpf_print(LIBBPF_WARN, ": %d\n", err);
return err;
}
- pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
relo_idx, err == 0 ? "non-matching" : "matching", i);
bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
libbpf_print(LIBBPF_DEBUG, "\n");
@@ -5492,7 +5612,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
* should all resolve to the same bit offset
*/
pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
- prog_name, relo_idx, cand_spec.bit_offset,
+ prog->name, relo_idx, cand_spec.bit_offset,
targ_spec.bit_offset);
return -EINVAL;
} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
@@ -5501,7 +5621,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
* proceed due to ambiguity
*/
pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
- prog_name, relo_idx,
+ prog->name, relo_idx,
cand_res.poison ? "failure" : "success", cand_res.new_val,
targ_res.poison ? "failure" : "success", targ_res.new_val);
return -EINVAL;
@@ -5534,7 +5654,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
*/
if (j == 0) {
pr_debug("prog '%s': relo #%d: no matching targets found\n",
- prog_name, relo_idx);
+ prog->name, relo_idx);
/* calculate single target relo result explicitly */
err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
@@ -5547,7 +5667,7 @@ patch_insn:
err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
if (err) {
pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
- prog_name, relo_idx, relo->insn_off, err);
+ prog->name, relo_idx, relo->insn_off, err);
return -EINVAL;
}
@@ -5565,7 +5685,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
struct bpf_program *prog;
struct btf *targ_btf;
const char *sec_name;
- int i, err = 0;
+ int i, err = 0, insn_idx, sec_idx;
if (obj->btf_ext->core_relo_info.len == 0)
return 0;
@@ -5592,24 +5712,37 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
err = -EINVAL;
goto out;
}
+ /* bpf_object's ELF is gone by now so it's not easy to find
+ * section index by section name, but we can find *any*
+ * bpf_program within desired section name and use it's
+ * prog->sec_idx to do a proper search by section index and
+ * instruction offset
+ */
prog = NULL;
for (i = 0; i < obj->nr_programs; i++) {
- if (!strcmp(obj->programs[i].section_name, sec_name)) {
- prog = &obj->programs[i];
+ prog = &obj->programs[i];
+ if (strcmp(prog->sec_name, sec_name) == 0)
break;
- }
}
if (!prog) {
- pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
- sec_name);
- err = -EINVAL;
- goto out;
+ pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
+ return -ENOENT;
}
+ sec_idx = prog->sec_idx;
pr_debug("sec '%s': found %d CO-RE relocations\n",
sec_name, sec->num_info);
for_each_btf_ext_rec(seg, sec, i, rec) {
+ insn_idx = rec->insn_off / BPF_INSN_SZ;
+ prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
+ if (!prog) {
+ pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
+ sec_name, insn_idx, i);
+ err = -EINVAL;
+ goto out;
+ }
+
err = bpf_core_apply_relo(prog, rec, i, obj->btf,
targ_btf, cand_cache);
if (err) {
@@ -5633,89 +5766,32 @@ out:
return err;
}
+/* Relocate data references within program code:
+ * - map references;
+ * - global variable references;
+ * - extern references.
+ */
static int
-bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
- struct reloc_desc *relo)
-{
- struct bpf_insn *insn, *new_insn;
- struct bpf_program *text;
- size_t new_cnt;
- int err;
-
- if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) {
- text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
- if (!text) {
- pr_warn("no .text section found yet relo into text exist\n");
- return -LIBBPF_ERRNO__RELOC;
- }
- new_cnt = prog->insns_cnt + text->insns_cnt;
- new_insn = libbpf_reallocarray(prog->insns, new_cnt, sizeof(*insn));
- if (!new_insn) {
- pr_warn("oom in prog realloc\n");
- return -ENOMEM;
- }
- prog->insns = new_insn;
-
- if (obj->btf_ext) {
- err = bpf_program_reloc_btf_ext(prog, obj,
- text->section_name,
- prog->insns_cnt);
- if (err)
- return err;
- }
-
- memcpy(new_insn + prog->insns_cnt, text->insns,
- text->insns_cnt * sizeof(*insn));
- prog->main_prog_cnt = prog->insns_cnt;
- prog->insns_cnt = new_cnt;
- pr_debug("added %zd insn from %s to prog %s\n",
- text->insns_cnt, text->section_name,
- prog->section_name);
- }
-
- insn = &prog->insns[relo->insn_idx];
- insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
- return 0;
-}
-
-static int
-bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
+bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
{
- int i, err;
-
- if (!prog)
- return 0;
-
- if (obj->btf_ext) {
- err = bpf_program_reloc_btf_ext(prog, obj,
- prog->section_name, 0);
- if (err)
- return err;
- }
-
- if (!prog->reloc_desc)
- return 0;
+ int i;
for (i = 0; i < prog->nr_reloc; i++) {
struct reloc_desc *relo = &prog->reloc_desc[i];
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
struct extern_desc *ext;
- if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
- pr_warn("relocation out of range: '%s'\n",
- prog->section_name);
- return -LIBBPF_ERRNO__RELOC;
- }
-
switch (relo->type) {
case RELO_LD64:
insn[0].src_reg = BPF_PSEUDO_MAP_FD;
insn[0].imm = obj->maps[relo->map_idx].fd;
+ relo->processed = true;
break;
case RELO_DATA:
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
insn[1].imm = insn[0].imm + relo->sym_off;
insn[0].imm = obj->maps[relo->map_idx].fd;
+ relo->processed = true;
break;
case RELO_EXTERN:
ext = &obj->externs[relo->sym_off];
@@ -5727,11 +5803,10 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
insn[0].imm = (__u32)ext->ksym.addr;
insn[1].imm = ext->ksym.addr >> 32;
}
+ relo->processed = true;
break;
case RELO_CALL:
- err = bpf_program__reloc_text(prog, obj, relo);
- if (err)
- return err;
+ /* will be handled as a follow up pass */
break;
default:
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
@@ -5740,8 +5815,378 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
}
}
- zfree(&prog->reloc_desc);
- prog->nr_reloc = 0;
+ return 0;
+}
+
+static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
+ const struct bpf_program *prog,
+ const struct btf_ext_info *ext_info,
+ void **prog_info, __u32 *prog_rec_cnt,
+ __u32 *prog_rec_sz)
+{
+ void *copy_start = NULL, *copy_end = NULL;
+ void *rec, *rec_end, *new_prog_info;
+ const struct btf_ext_info_sec *sec;
+ size_t old_sz, new_sz;
+ const char *sec_name;
+ int i, off_adj;
+
+ for_each_btf_ext_sec(ext_info, sec) {
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (!sec_name)
+ return -EINVAL;
+ if (strcmp(sec_name, prog->sec_name) != 0)
+ continue;
+
+ for_each_btf_ext_rec(ext_info, sec, i, rec) {
+ __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
+
+ if (insn_off < prog->sec_insn_off)
+ continue;
+ if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
+ break;
+
+ if (!copy_start)
+ copy_start = rec;
+ copy_end = rec + ext_info->rec_size;
+ }
+
+ if (!copy_start)
+ return -ENOENT;
+
+ /* append func/line info of a given (sub-)program to the main
+ * program func/line info
+ */
+ old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
+ new_sz = old_sz + (copy_end - copy_start);
+ new_prog_info = realloc(*prog_info, new_sz);
+ if (!new_prog_info)
+ return -ENOMEM;
+ *prog_info = new_prog_info;
+ *prog_rec_cnt = new_sz / ext_info->rec_size;
+ memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
+
+ /* Kernel instruction offsets are in units of 8-byte
+ * instructions, while .BTF.ext instruction offsets generated
+ * by Clang are in units of bytes. So convert Clang offsets
+ * into kernel offsets and adjust offset according to program
+ * relocated position.
+ */
+ off_adj = prog->sub_insn_off - prog->sec_insn_off;
+ rec = new_prog_info + old_sz;
+ rec_end = new_prog_info + new_sz;
+ for (; rec < rec_end; rec += ext_info->rec_size) {
+ __u32 *insn_off = rec;
+
+ *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
+ }
+ *prog_rec_sz = ext_info->rec_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int
+reloc_prog_func_and_line_info(const struct bpf_object *obj,
+ struct bpf_program *main_prog,
+ const struct bpf_program *prog)
+{
+ int err;
+
+ /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
+ * supprot func/line info
+ */
+ if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))
+ return 0;
+
+ /* only attempt func info relocation if main program's func_info
+ * relocation was successful
+ */
+ if (main_prog != prog && !main_prog->func_info)
+ goto line_info;
+
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
+ &main_prog->func_info,
+ &main_prog->func_info_cnt,
+ &main_prog->func_info_rec_size);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
+ prog->name, err);
+ return err;
+ }
+ if (main_prog->func_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
+ return err;
+ }
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
+ prog->name);
+ }
+
+line_info:
+ /* don't relocate line info if main program's relocation failed */
+ if (main_prog != prog && !main_prog->line_info)
+ return 0;
+
+ err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
+ &main_prog->line_info,
+ &main_prog->line_info_cnt,
+ &main_prog->line_info_rec_size);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
+ prog->name, err);
+ return err;
+ }
+ if (main_prog->line_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
+ return err;
+ }
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
+ prog->name);
+ }
+ return 0;
+}
+
+static int cmp_relo_by_insn_idx(const void *key, const void *elem)
+{
+ size_t insn_idx = *(const size_t *)key;
+ const struct reloc_desc *relo = elem;
+
+ if (insn_idx == relo->insn_idx)
+ return 0;
+ return insn_idx < relo->insn_idx ? -1 : 1;
+}
+
+static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
+{
+ return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
+ sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
+}
+
+static int
+bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
+ struct bpf_program *prog)
+{
+ size_t sub_insn_idx, insn_idx, new_cnt;
+ struct bpf_program *subprog;
+ struct bpf_insn *insns, *insn;
+ struct reloc_desc *relo;
+ int err;
+
+ err = reloc_prog_func_and_line_info(obj, main_prog, prog);
+ if (err)
+ return err;
+
+ for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+ if (!insn_is_subprog_call(insn))
+ continue;
+
+ relo = find_prog_insn_relo(prog, insn_idx);
+ if (relo && relo->type != RELO_CALL) {
+ pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
+ prog->name, insn_idx, relo->type);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (relo) {
+ /* sub-program instruction index is a combination of
+ * an offset of a symbol pointed to by relocation and
+ * call instruction's imm field; for global functions,
+ * call always has imm = -1, but for static functions
+ * relocation is against STT_SECTION and insn->imm
+ * points to a start of a static function
+ */
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ } else {
+ /* if subprogram call is to a static function within
+ * the same ELF section, there won't be any relocation
+ * emitted, but it also means there is no additional
+ * offset necessary, insns->imm is relative to
+ * instruction's original position within the section
+ */
+ sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
+ }
+
+ /* we enforce that sub-programs should be in .text section */
+ subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
+ if (!subprog) {
+ pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
+ prog->name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ /* if it's the first call instruction calling into this
+ * subprogram (meaning this subprog hasn't been processed
+ * yet) within the context of current main program:
+ * - append it at the end of main program's instructions blog;
+ * - process is recursively, while current program is put on hold;
+ * - if that subprogram calls some other not yet processes
+ * subprogram, same thing will happen recursively until
+ * there are no more unprocesses subprograms left to append
+ * and relocate.
+ */
+ if (subprog->sub_insn_off == 0) {
+ subprog->sub_insn_off = main_prog->insns_cnt;
+
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+ main_prog->insns = insns;
+ main_prog->insns_cnt = new_cnt;
+
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+ subprog->insns_cnt * sizeof(*insns));
+
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+ main_prog->name, subprog->insns_cnt, subprog->name);
+
+ err = bpf_object__reloc_code(obj, main_prog, subprog);
+ if (err)
+ return err;
+ }
+
+ /* main_prog->insns memory could have been re-allocated, so
+ * calculate pointer again
+ */
+ insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
+ /* calculate correct instruction position within current main
+ * prog; each main prog can have a different set of
+ * subprograms appended (potentially in different order as
+ * well), so position of any subprog can be different for
+ * different main programs */
+ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
+
+ if (relo)
+ relo->processed = true;
+
+ pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
+ prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
+ }
+
+ return 0;
+}
+
+/*
+ * Relocate sub-program calls.
+ *
+ * Algorithm operates as follows. Each entry-point BPF program (referred to as
+ * main prog) is processed separately. For each subprog (non-entry functions,
+ * that can be called from either entry progs or other subprogs) gets their
+ * sub_insn_off reset to zero. This serves as indicator that this subprogram
+ * hasn't been yet appended and relocated within current main prog. Once its
+ * relocated, sub_insn_off will point at the position within current main prog
+ * where given subprog was appended. This will further be used to relocate all
+ * the call instructions jumping into this subprog.
+ *
+ * We start with main program and process all call instructions. If the call
+ * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
+ * is zero), subprog instructions are appended at the end of main program's
+ * instruction array. Then main program is "put on hold" while we recursively
+ * process newly appended subprogram. If that subprogram calls into another
+ * subprogram that hasn't been appended, new subprogram is appended again to
+ * the *main* prog's instructions (subprog's instructions are always left
+ * untouched, as they need to be in unmodified state for subsequent main progs
+ * and subprog instructions are always sent only as part of a main prog) and
+ * the process continues recursively. Once all the subprogs called from a main
+ * prog or any of its subprogs are appended (and relocated), all their
+ * positions within finalized instructions array are known, so it's easy to
+ * rewrite call instructions with correct relative offsets, corresponding to
+ * desired target subprog.
+ *
+ * Its important to realize that some subprogs might not be called from some
+ * main prog and any of its called/used subprogs. Those will keep their
+ * subprog->sub_insn_off as zero at all times and won't be appended to current
+ * main prog and won't be relocated within the context of current main prog.
+ * They might still be used from other main progs later.
+ *
+ * Visually this process can be shown as below. Suppose we have two main
+ * programs mainA and mainB and BPF object contains three subprogs: subA,
+ * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
+ * subC both call subB:
+ *
+ * +--------+ +-------+
+ * | v v |
+ * +--+---+ +--+-+-+ +---+--+
+ * | subA | | subB | | subC |
+ * +--+---+ +------+ +---+--+
+ * ^ ^
+ * | |
+ * +---+-------+ +------+----+
+ * | mainA | | mainB |
+ * +-----------+ +-----------+
+ *
+ * We'll start relocating mainA, will find subA, append it and start
+ * processing sub A recursively:
+ *
+ * +-----------+------+
+ * | mainA | subA |
+ * +-----------+------+
+ *
+ * At this point we notice that subB is used from subA, so we append it and
+ * relocate (there are no further subcalls from subB):
+ *
+ * +-----------+------+------+
+ * | mainA | subA | subB |
+ * +-----------+------+------+
+ *
+ * At this point, we relocate subA calls, then go one level up and finish with
+ * relocatin mainA calls. mainA is done.
+ *
+ * For mainB process is similar but results in different order. We start with
+ * mainB and skip subA and subB, as mainB never calls them (at least
+ * directly), but we see subC is needed, so we append and start processing it:
+ *
+ * +-----------+------+
+ * | mainB | subC |
+ * +-----------+------+
+ * Now we see subC needs subB, so we go back to it, append and relocate it:
+ *
+ * +-----------+------+------+
+ * | mainB | subC | subB |
+ * +-----------+------+------+
+ *
+ * At this point we unwind recursion, relocate calls in subC, then in mainB.
+ */
+static int
+bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
+{
+ struct bpf_program *subprog;
+ int i, j, err;
+
+ /* mark all subprogs as not relocated (yet) within the context of
+ * current main program
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ subprog = &obj->programs[i];
+ if (!prog_is_subprog(obj, subprog))
+ continue;
+
+ subprog->sub_insn_off = 0;
+ for (j = 0; j < subprog->nr_reloc; j++)
+ if (subprog->reloc_desc[j].type == RELO_CALL)
+ subprog->reloc_desc[j].processed = false;
+ }
+
+ err = bpf_object__reloc_code(obj, prog, prog);
+ if (err)
+ return err;
+
+
return 0;
}
@@ -5760,37 +6205,45 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- /* ensure .text is relocated first, as it's going to be copied as-is
- * later for sub-program calls
+ /* relocate data references first for all programs and sub-programs,
+ * as they don't change relative to code locations, so subsequent
+ * subprogram processing won't need to re-calculate any of them
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (prog->idx != obj->efile.text_shndx)
- continue;
-
- err = bpf_program__relocate(prog, obj);
+ err = bpf_object__relocate_data(obj, prog);
if (err) {
pr_warn("prog '%s': failed to relocate data references: %d\n",
prog->name, err);
return err;
}
- break;
}
- /* now relocate everything but .text, which by now is relocated
- * properly, so we can copy raw sub-program instructions as is safely
+ /* now relocate subprogram calls and append used subprograms to main
+ * programs; each copy of subprogram code needs to be relocated
+ * differently for each main program, because its code location might
+ * have changed
*/
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (prog->idx == obj->efile.text_shndx)
+ /* sub-program's sub-calls are relocated within the context of
+ * its main program only
+ */
+ if (prog_is_subprog(obj, prog))
continue;
- err = bpf_program__relocate(prog, obj);
+ err = bpf_object__relocate_calls(obj, prog);
if (err) {
pr_warn("prog '%s': failed to relocate calls: %d\n",
prog->name, err);
return err;
}
}
+ /* free up relocation descriptors */
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ }
return 0;
}
@@ -5910,41 +6363,53 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
return 0;
}
-static int bpf_object__collect_reloc(struct bpf_object *obj)
+static int cmp_relocs(const void *_a, const void *_b)
{
- int i, err;
+ const struct reloc_desc *a = _a;
+ const struct reloc_desc *b = _b;
- if (!obj_elf_valid(obj)) {
- pr_warn("Internal error: elf object is closed\n");
- return -LIBBPF_ERRNO__INTERNAL;
- }
+ if (a->insn_idx != b->insn_idx)
+ return a->insn_idx < b->insn_idx ? -1 : 1;
+
+ /* no two relocations should have the same insn_idx, but ... */
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ return 0;
+}
+
+static int bpf_object__collect_relos(struct bpf_object *obj)
+{
+ int i, err;
for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
Elf_Data *data = obj->efile.reloc_sects[i].data;
int idx = shdr->sh_info;
- struct bpf_program *prog;
if (shdr->sh_type != SHT_REL) {
pr_warn("internal error at %d\n", __LINE__);
return -LIBBPF_ERRNO__INTERNAL;
}
- if (idx == obj->efile.st_ops_shndx) {
+ if (idx == obj->efile.st_ops_shndx)
err = bpf_object__collect_st_ops_relos(obj, shdr, data);
- } else if (idx == obj->efile.btf_maps_shndx) {
+ else if (idx == obj->efile.btf_maps_shndx)
err = bpf_object__collect_map_relos(obj, shdr, data);
- } else {
- prog = bpf_object__find_prog_by_idx(obj, idx);
- if (!prog) {
- pr_warn("relocation failed: no prog in section(%d)\n", idx);
- return -LIBBPF_ERRNO__RELOC;
- }
- err = bpf_program__collect_reloc(prog, shdr, data, obj);
- }
+ else
+ err = bpf_object__collect_prog_relos(obj, shdr, data);
if (err)
return err;
}
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *p = &obj->programs[i];
+
+ if (!p->nr_reloc)
+ continue;
+
+ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
+ }
return 0;
}
@@ -6058,6 +6523,20 @@ retry_load:
if (ret >= 0) {
if (log_buf && load_attr.log_level)
pr_debug("verifier log:\n%s", log_buf);
+
+ if (prog->obj->rodata_map_idx >= 0 &&
+ kernel_supports(FEAT_PROG_BIND_MAP)) {
+ struct bpf_map *rodata_map =
+ &prog->obj->maps[prog->obj->rodata_map_idx];
+
+ if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("prog '%s': failed to bind .rodata map: %s\n",
+ prog->name, cp);
+ /* Don't fail hard if can't bind rodata. */
+ }
+ }
+
*pfd = ret;
ret = 0;
goto out;
@@ -6110,8 +6589,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
int err = 0, fd, i, btf_id;
if (prog->obj->loaded) {
- pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
- prog->name, prog->section_name);
+ pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
return -EINVAL;
}
@@ -6127,7 +6605,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (prog->instances.nr < 0 || !prog->instances.fds) {
if (prog->preprocessor) {
pr_warn("Internal error: can't load program '%s'\n",
- prog->section_name);
+ prog->name);
return -LIBBPF_ERRNO__INTERNAL;
}
@@ -6142,8 +6620,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
if (!prog->preprocessor) {
if (prog->instances.nr != 1) {
- pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
- prog->section_name, prog->instances.nr);
+ pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
+ prog->name, prog->instances.nr);
}
err = load_program(prog, prog->insns, prog->insns_cnt,
license, kern_ver, &fd);
@@ -6161,13 +6639,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
prog->insns_cnt, &result);
if (err) {
pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
- i, prog->section_name);
+ i, prog->name);
goto out;
}
if (!result.new_insn_ptr || !result.new_insn_cnt) {
pr_debug("Skip loading the %dth instance of program '%s'\n",
- i, prog->section_name);
+ i, prog->name);
prog->instances.fds[i] = -1;
if (result.pfd)
*result.pfd = -1;
@@ -6178,7 +6656,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
result.new_insn_cnt, license, kern_ver, &fd);
if (err) {
pr_warn("Loading the %dth instance of program '%s' failed\n",
- i, prog->section_name);
+ i, prog->name);
goto out;
}
@@ -6188,18 +6666,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
}
out:
if (err)
- pr_warn("failed to load program '%s'\n", prog->section_name);
+ pr_warn("failed to load program '%s'\n", prog->name);
zfree(&prog->insns);
prog->insns_cnt = 0;
return err;
}
-static bool bpf_program__is_function_storage(const struct bpf_program *prog,
- const struct bpf_object *obj)
-{
- return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
-}
-
static int
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
@@ -6216,7 +6688,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- if (bpf_program__is_function_storage(prog, obj))
+ if (prog_is_subprog(obj, prog))
continue;
if (!prog->load) {
pr_debug("prog '%s': skipped loading\n", prog->name);
@@ -6280,14 +6752,13 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
err = err ? : bpf_object__collect_externs(obj);
err = err ? : bpf_object__finalize_btf(obj);
err = err ? : bpf_object__init_maps(obj, opts);
- err = err ? : bpf_object__init_prog_names(obj);
- err = err ? : bpf_object__collect_reloc(obj);
+ err = err ? : bpf_object__collect_relos(obj);
if (err)
goto out;
bpf_object__elf_finish(obj);
bpf_object__for_each_program(prog, obj) {
- prog->sec_def = find_sec_def(prog->section_name);
+ prog->sec_def = find_sec_def(prog->sec_name);
if (!prog->sec_def)
/* couldn't guess, but user might manually specify */
continue;
@@ -6668,7 +7139,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->section_name, prog->instances.nr);
+ instance, prog->name, prog->instances.nr);
return -EINVAL;
}
@@ -6699,7 +7170,7 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
if (instance < 0 || instance >= prog->instances.nr) {
pr_warn("invalid prog instance %d of prog %s (max %d)\n",
- instance, prog->section_name, prog->instances.nr);
+ instance, prog->name, prog->instances.nr);
return -EINVAL;
}
@@ -6729,8 +7200,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
}
if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n",
- prog->section_name);
+ pr_warn("no instances of prog %s to pin\n", prog->name);
return -EINVAL;
}
@@ -6792,8 +7262,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
}
if (prog->instances.nr <= 0) {
- pr_warn("no instances of prog %s to pin\n",
- prog->section_name);
+ pr_warn("no instances of prog %s to pin\n", prog->name);
return -EINVAL;
}
@@ -7285,7 +7754,7 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
do {
prog = __bpf_program__iter(prog, obj, true);
- } while (prog && bpf_program__is_function_storage(prog, obj));
+ } while (prog && prog_is_subprog(obj, prog));
return prog;
}
@@ -7297,7 +7766,7 @@ bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
do {
prog = __bpf_program__iter(prog, obj, false);
- } while (prog && bpf_program__is_function_storage(prog, obj));
+ } while (prog && prog_is_subprog(obj, prog));
return prog;
}
@@ -7328,11 +7797,16 @@ const char *bpf_program__name(const struct bpf_program *prog)
return prog->name;
}
+const char *bpf_program__section_name(const struct bpf_program *prog)
+{
+ return prog->sec_name;
+}
+
const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
{
const char *title;
- title = prog->section_name;
+ title = prog->sec_name;
if (needs_copy) {
title = strdup(title);
if (!title) {
@@ -7405,14 +7879,14 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
if (n >= prog->instances.nr || n < 0) {
pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
- n, prog->section_name, prog->instances.nr);
+ n, prog->name, prog->instances.nr);
return -EINVAL;
}
fd = prog->instances.fds[n];
if (fd < 0) {
pr_warn("%dth instance of program '%s' is invalid\n",
- n, prog->section_name);
+ n, prog->name);
return -ENOENT;
}
@@ -7772,7 +8246,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
const struct btf *btf;
struct bpf_map *map;
Elf_Data *symbols;
- unsigned int moff;
+ unsigned int moff, insn_idx;
const char *name;
__u32 member_idx;
GElf_Sym sym;
@@ -7817,6 +8291,12 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
map->name, (size_t)rel.r_offset, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
+ if (sym.st_value % BPF_INSN_SZ) {
+ pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
+ map->name, (unsigned long long)sym.st_value);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ insn_idx = sym.st_value / BPF_INSN_SZ;
member = find_member_by_offset(st_ops->type, moff * 8);
if (!member) {
@@ -7833,7 +8313,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
return -EINVAL;
}
- prog = bpf_object__find_prog_by_idx(obj, shdr_idx);
+ prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
if (!prog) {
pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
map->name, shdr_idx, name);
@@ -7843,7 +8323,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
if (prog->type == BPF_PROG_TYPE_UNSPEC) {
const struct bpf_sec_def *sec_def;
- sec_def = find_sec_def(prog->section_name);
+ sec_def = find_sec_def(prog->sec_name);
if (sec_def &&
sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
/* for pr_warn */
@@ -7866,7 +8346,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
invalid_prog:
pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
- map->name, prog->name, prog->section_name, prog->type,
+ map->name, prog->name, prog->sec_name, prog->type,
prog->attach_btf_id, prog->expected_attach_type, name);
return -EINVAL;
}
@@ -7970,7 +8450,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
{
enum bpf_attach_type attach_type = prog->expected_attach_type;
__u32 attach_prog_fd = prog->attach_prog_fd;
- const char *name = prog->section_name;
+ const char *name = prog->sec_name;
int i, err;
if (!name)
@@ -8497,14 +8977,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
int prog_fd, err;
if (pfd < 0) {
- pr_warn("program '%s': invalid perf event FD %d\n",
- bpf_program__title(prog, false), pfd);
+ pr_warn("prog '%s': invalid perf event FD %d\n",
+ prog->name, pfd);
return ERR_PTR(-EINVAL);
}
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8517,20 +8997,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
err = -errno;
free(link);
- pr_warn("program '%s': failed to attach to pfd %d: %s\n",
- bpf_program__title(prog, false), pfd,
- libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
if (err == -EPROTO)
- pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
- bpf_program__title(prog, false), pfd);
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ prog->name, pfd);
return ERR_PTR(err);
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
free(link);
- pr_warn("program '%s': failed to enable pfd %d: %s\n",
- bpf_program__title(prog, false), pfd,
- libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return ERR_PTR(err);
}
return link;
@@ -8652,9 +9130,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
0 /* offset */, -1 /* pid */);
if (pfd < 0) {
- pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
- bpf_program__title(prog, false),
- retprobe ? "kretprobe" : "kprobe", func_name,
+ pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
@@ -8662,9 +9139,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to %s '%s': %s\n",
- bpf_program__title(prog, false),
- retprobe ? "kretprobe" : "kprobe", func_name,
+ pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
}
@@ -8677,7 +9153,7 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
const char *func_name;
bool retprobe;
- func_name = bpf_program__title(prog, false) + sec->len;
+ func_name = prog->sec_name + sec->len;
retprobe = strcmp(sec->sec, "kretprobe/") == 0;
return bpf_program__attach_kprobe(prog, retprobe, func_name);
@@ -8695,9 +9171,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
pfd = perf_event_open_probe(true /* uprobe */, retprobe,
binary_path, func_offset, pid);
if (pfd < 0) {
- pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
- bpf_program__title(prog, false),
- retprobe ? "uretprobe" : "uprobe",
+ pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+ prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
@@ -8706,9 +9181,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
- bpf_program__title(prog, false),
- retprobe ? "uretprobe" : "uprobe",
+ pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
+ prog->name, retprobe ? "uretprobe" : "uprobe",
binary_path, func_offset,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
@@ -8776,9 +9250,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
pfd = perf_event_open_tracepoint(tp_category, tp_name);
if (pfd < 0) {
- pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
- bpf_program__title(prog, false),
- tp_category, tp_name,
+ pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+ prog->name, tp_category, tp_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
@@ -8786,9 +9259,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
if (IS_ERR(link)) {
close(pfd);
err = PTR_ERR(link);
- pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
- bpf_program__title(prog, false),
- tp_category, tp_name,
+ pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
+ prog->name, tp_category, tp_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return link;
}
@@ -8801,7 +9273,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
char *sec_name, *tp_cat, *tp_name;
struct bpf_link *link;
- sec_name = strdup(bpf_program__title(prog, false));
+ sec_name = strdup(prog->sec_name);
if (!sec_name)
return ERR_PTR(-ENOMEM);
@@ -8830,8 +9302,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8844,9 +9315,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
- bpf_program__title(prog, false), tp_name,
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
+ prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
link->fd = pfd;
@@ -8856,7 +9326,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
- const char *tp_name = bpf_program__title(prog, false) + sec->len;
+ const char *tp_name = prog->sec_name + sec->len;
return bpf_program__attach_raw_tracepoint(prog, tp_name);
}
@@ -8870,8 +9340,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8884,9 +9353,8 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach: %s\n",
- bpf_program__title(prog, false),
- libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach: %s\n",
+ prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
}
link->fd = pfd;
@@ -8932,8 +9400,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -8947,8 +9414,8 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
if (link_fd < 0) {
link_fd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to %s: %s\n",
- bpf_program__title(prog, false), target_name,
+ pr_warn("prog '%s': failed to attach to %s: %s\n",
+ prog->name, target_name,
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
return ERR_PTR(link_fd);
}
@@ -8992,8 +9459,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
- pr_warn("program '%s': can't attach before loaded\n",
- bpf_program__title(prog, false));
+ pr_warn("prog '%s': can't attach before loaded\n", prog->name);
return ERR_PTR(-EINVAL);
}
@@ -9007,9 +9473,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
if (link_fd < 0) {
link_fd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to iterator: %s\n",
- bpf_program__title(prog, false),
- libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ pr_warn("prog '%s': failed to attach to iterator: %s\n",
+ prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
return ERR_PTR(link_fd);
}
link->fd = link_fd;
@@ -9020,7 +9485,7 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
- sec_def = find_sec_def(bpf_program__title(prog, false));
+ sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
return ERR_PTR(-ESRCH);
@@ -10090,12 +10555,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
struct bpf_program *prog = *s->progs[i].prog;
struct bpf_link **link = s->progs[i].link;
const struct bpf_sec_def *sec_def;
- const char *sec_name = bpf_program__title(prog, false);
if (!prog->load)
continue;
- sec_def = find_sec_def(sec_name);
+ sec_def = find_sec_def(prog->sec_name);
if (!sec_def || !sec_def->attach_fn)
continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 308e0ded8f14..a750f67a23f6 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -198,8 +198,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
-LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
- bool needs_copy);
+LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
+LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 3fedcdc4ae2f..5f054dadf082 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -302,6 +302,8 @@ LIBBPF_0.1.0 {
LIBBPF_0.2.0 {
global:
+ bpf_prog_bind_map;
+ bpf_program__section_name;
perf_buffer__buffer_cnt;
perf_buffer__buffer_fd;
perf_buffer__epoll_fd;
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index a23ae1ac27eb..947d8bd8a7bb 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -15,6 +15,8 @@
#define LIBBPF_API __attribute__((visibility("default")))
#endif
+#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))
+
/* Helper macro to declare and initialize libbpf options struct
*
* This dance with uninitialized declaration, followed by memset to zero,
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 49c324594792..30b4ca5d2ac7 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -20,6 +20,7 @@
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_xdp.h>
+#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/sockios.h>
#include <net/if.h>
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 2feb751516ab..0374adcb223c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -328,12 +328,6 @@ config_bpf_program(struct bpf_program *prog)
probe_conf.no_inlines = false;
probe_conf.force_add = false;
- config_str = bpf_program__title(prog, false);
- if (IS_ERR(config_str)) {
- pr_debug("bpf: unable to get title for program\n");
- return PTR_ERR(config_str);
- }
-
priv = calloc(sizeof(*priv), 1);
if (!priv) {
pr_debug("bpf: failed to alloc priv\n");
@@ -341,6 +335,7 @@ config_bpf_program(struct bpf_program *prog)
}
pev = &priv->pev;
+ config_str = bpf_program__section_name(prog);
pr_debug("bpf: config program '%s'\n", config_str);
err = parse_prog_config(config_str, &main_str, &is_tp, pev);
if (err)
@@ -454,10 +449,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
if (err) {
const char *title;
- title = bpf_program__title(prog, false);
- if (!title)
- title = "[unknown]";
-
+ title = bpf_program__section_name(prog);
pr_debug("Failed to generate prologue for program %s\n",
title);
return err;
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 9a0946ddb705..e8fed558b8b8 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -15,7 +15,6 @@ test_sock
test_sock_addr
test_sock_fields
urandom_read
-test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 65d3d9aaeb31..59a5fa5fe837 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -33,7 +33,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
+ test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
test_progs-no_alu32 \
@@ -68,7 +68,8 @@ TEST_PROGS := test_kmod.sh \
test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \
- test_bpftool.sh
+ test_bpftool.sh \
+ test_bpftool_metadata.sh \
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@@ -176,6 +177,11 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
OUTPUT=$(BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(SCRATCH_DIR)/ install
+ $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
+ $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
+ -C $(BPFTOOLDIR)/Documentation \
+ OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
+ prefix= DESTDIR=$(SCRATCH_DIR)/ install
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index daeaeb518894..7290401ec172 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj,
if (ret)
return ret;
- main_prog = bpf_object__find_program_by_title(*obj, section_name);
+ main_prog = NULL;
+ bpf_object__for_each_program(prog, *obj) {
+ if (strcmp(section_name, bpf_program__section_name(prog)) == 0) {
+ main_prog = prog;
+ break;
+ }
+ }
if (!main_prog)
return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 7375d9a6d242..fe1a83b9875c 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -132,17 +132,38 @@ static void test_task_stack(void)
bpf_iter_task_stack__destroy(skel);
}
+static void *do_nothing(void *arg)
+{
+ pthread_exit(arg);
+}
+
static void test_task_file(void)
{
struct bpf_iter_task_file *skel;
+ pthread_t thread_id;
+ void *ret;
skel = bpf_iter_task_file__open_and_load();
if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
"skeleton open_and_load failed\n"))
return;
+ skel->bss->tgid = getpid();
+
+ if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
+ "pthread_create", "pthread_create failed\n"))
+ goto done;
+
do_dummy_read(skel->progs.dump_task_file);
+ if (CHECK(pthread_join(thread_id, &ret) || ret != NULL,
+ "pthread_join", "pthread_join failed\n"))
+ goto done;
+
+ CHECK(skel->bss->count != 0, "check_count",
+ "invalid non pthread file visit count %d\n", skel->bss->count);
+
+done:
bpf_iter_task_file__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e9f2f12ba06b..e698ee6bb6c2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -49,6 +49,7 @@ void test_bpf_verif_scale(void)
{ "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
{ "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
/* full unroll by llvm */
{ "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
@@ -86,6 +87,9 @@ void test_bpf_verif_scale(void)
{ "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+ /* non-inlined subprogs */
+ { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+
{ "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
{ "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index c75fc6447186..93162484c2ca 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -24,40 +24,17 @@
#include "bpf_rlimit.h"
#include "bpf_util.h"
-#include "test_btf.h"
+#include "../test_btf.h"
+#include "test_progs.h"
#define MAX_INSNS 512
#define MAX_SUBPROGS 16
-static uint32_t pass_cnt;
-static uint32_t error_cnt;
-static uint32_t skip_cnt;
+static int duration = 0;
+static bool always_log;
-#define CHECK(condition, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
- fprintf(stderr, format); \
- } \
- __ret; \
-})
-
-static int count_result(int err)
-{
- if (err)
- error_cnt++;
- else
- pass_cnt++;
-
- fprintf(stderr, "\n");
- return err;
-}
-
-static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
- const char *format, va_list args)
-{
- return vfprintf(stderr, format, args);
-}
+#undef CHECK
+#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
#define BTF_END_RAW 0xdeadbeef
#define NAME_TBD 0xdeadb33f
@@ -69,21 +46,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
#define MAX_NR_RAW_U32 1024
#define BTF_LOG_BUF_SIZE 65535
-static struct args {
- unsigned int raw_test_num;
- unsigned int file_test_num;
- unsigned int get_info_test_num;
- unsigned int info_raw_test_num;
- unsigned int dedup_test_num;
- bool raw_test;
- bool file_test;
- bool get_info_test;
- bool pprint_test;
- bool always_log;
- bool info_raw_test;
- bool dedup_test;
-} args;
-
static char btf_log_buf[BTF_LOG_BUF_SIZE];
static struct btf_header hdr_tmpl = {
@@ -3664,7 +3626,7 @@ done:
return raw_btf;
}
-static int do_test_raw(unsigned int test_num)
+static void do_test_raw(unsigned int test_num)
{
struct btf_raw_test *test = &raw_tests[test_num - 1];
struct bpf_create_map_attr create_attr = {};
@@ -3674,15 +3636,16 @@ static int do_test_raw(unsigned int test_num)
void *raw_btf;
int err;
- fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
test->str_sec,
test->str_sec_size,
&raw_btf_size, NULL);
-
if (!raw_btf)
- return -1;
+ return;
hdr = raw_btf;
@@ -3694,7 +3657,7 @@ static int do_test_raw(unsigned int test_num)
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
err = ((btf_fd == -1) != test->btf_load_err);
@@ -3725,32 +3688,12 @@ static int do_test_raw(unsigned int test_num)
map_fd, test->map_create_err);
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
-
if (btf_fd != -1)
close(btf_fd);
if (map_fd != -1)
close(map_fd);
-
- return err;
-}
-
-static int test_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.raw_test_num)
- return count_result(do_test_raw(args.raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
- err |= count_result(do_test_raw(i));
-
- return err;
}
struct btf_get_info_test {
@@ -3814,11 +3757,6 @@ const struct btf_get_info_test get_info_tests[] = {
},
};
-static inline __u64 ptr_to_u64(const void *ptr)
-{
- return (__u64)(unsigned long)ptr;
-}
-
static int test_big_btf_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
@@ -3851,7 +3789,7 @@ static int test_big_btf_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -3892,7 +3830,7 @@ static int test_big_btf_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -3939,7 +3877,7 @@ static int test_btf_id(unsigned int test_num)
btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4024,7 +3962,7 @@ static int test_btf_id(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -4039,7 +3977,7 @@ done:
return err;
}
-static int do_test_get_info(unsigned int test_num)
+static void do_test_get_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
unsigned int raw_btf_size, user_btf_size, expected_nbytes;
@@ -4048,11 +3986,14 @@ static int do_test_get_info(unsigned int test_num)
int btf_fd = -1, err, ret;
uint32_t info_len;
- fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
- test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
- if (test->special_test)
- return test->special_test(test_num);
+ if (test->special_test) {
+ err = test->special_test(test_num);
+ if (CHECK(err, "failed: %d\n", err))
+ return;
+ }
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
@@ -4061,7 +4002,7 @@ static int do_test_get_info(unsigned int test_num)
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
@@ -4073,7 +4014,7 @@ static int do_test_get_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
err = -1;
goto done;
@@ -4114,7 +4055,7 @@ static int do_test_get_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
@@ -4122,22 +4063,6 @@ done:
if (btf_fd != -1)
close(btf_fd);
-
- return err;
-}
-
-static int test_get_info(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.get_info_test_num)
- return count_result(do_test_get_info(args.get_info_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
- err |= count_result(do_test_get_info(i));
-
- return err;
}
struct btf_file_test {
@@ -4151,7 +4076,7 @@ static struct btf_file_test file_tests[] = {
{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
};
-static int do_test_file(unsigned int test_num)
+static void do_test_file(unsigned int test_num)
{
const struct btf_file_test *test = &file_tests[test_num - 1];
const char *expected_fnames[] = {"_dummy_tracepoint",
@@ -4169,17 +4094,17 @@ static int do_test_file(unsigned int test_num)
struct bpf_map *map;
int i, err, prog_fd;
- fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
- test->file);
+ if (!test__start_subtest(test->file))
+ return;
btf = btf__parse_elf(test->file, &btf_ext);
if (IS_ERR(btf)) {
if (PTR_ERR(btf) == -ENOENT) {
- fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
- skip_cnt++;
- return 0;
+ printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
+ test__skip();
+ return;
}
- return PTR_ERR(btf);
+ return;
}
btf__free(btf);
@@ -4188,7 +4113,7 @@ static int do_test_file(unsigned int test_num)
obj = bpf_object__open(test->file);
if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
- return PTR_ERR(obj);
+ return;
prog = bpf_program__next(NULL, obj);
if (CHECK(!prog, "Cannot find bpf_prog")) {
@@ -4310,21 +4235,6 @@ skip:
done:
free(func_info);
bpf_object__close(obj);
- return err;
-}
-
-static int test_file(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.file_test_num)
- return count_result(do_test_file(args.file_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
- err |= count_result(do_test_file(i));
-
- return err;
}
const char *pprint_enum_str[] = {
@@ -4428,7 +4338,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4493,7 +4403,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4564,7 +4474,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
#ifdef __SIZEOF_INT128__
@@ -4591,7 +4501,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv_int128),
.key_type_id = 1,
.value_type_id = 4,
- .max_entries = 128 * 1024,
+ .max_entries = 128,
.mapv_kind = PPRINT_MAPV_KIND_INT128,
},
#endif
@@ -4790,7 +4700,7 @@ static int check_line(const char *expected_line, int nexpected_line,
}
-static int do_test_pprint(int test_num)
+static void do_test_pprint(int test_num)
{
const struct btf_raw_test *test = &pprint_test_template[test_num];
enum pprint_mapv_kind_t mapv_kind = test->mapv_kind;
@@ -4809,18 +4719,20 @@ static int do_test_pprint(int test_num)
uint8_t *raw_btf;
ssize_t nread;
- fprintf(stderr, "%s(#%d)......", test->descr, test_num);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
if (CHECK(btf_fd == -1, "errno:%d", errno)) {
@@ -4971,7 +4883,7 @@ done:
free(mapv);
if (!err)
fprintf(stderr, "OK");
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
if (btf_fd != -1)
close(btf_fd);
@@ -4981,14 +4893,11 @@ done:
fclose(pin_file);
unlink(pin_path);
free(line);
-
- return err;
}
-static int test_pprint(void)
+static void test_pprint(void)
{
unsigned int i;
- int err = 0;
/* test various maps with the first test template */
for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
@@ -4999,7 +4908,7 @@ static int test_pprint(void)
pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map;
pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map;
- err |= count_result(do_test_pprint(0));
+ do_test_pprint(0);
}
/* test rest test templates with the first map */
@@ -5010,10 +4919,8 @@ static int test_pprint(void)
pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map;
pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map;
pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map;
- err |= count_result(do_test_pprint(i));
+ do_test_pprint(i);
}
-
- return err;
}
#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \
@@ -6178,7 +6085,7 @@ done:
return err;
}
-static int do_test_info_raw(unsigned int test_num)
+static void do_test_info_raw(unsigned int test_num)
{
const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1];
unsigned int raw_btf_size, linfo_str_off, linfo_size;
@@ -6187,18 +6094,19 @@ static int do_test_info_raw(unsigned int test_num)
const char *ret_next_str;
union bpf_attr attr = {};
- fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, &ret_next_str);
-
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ always_log);
free(raw_btf);
if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
@@ -6206,7 +6114,7 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
}
- if (*btf_log_buf && args.always_log)
+ if (*btf_log_buf && always_log)
fprintf(stderr, "\n%s", btf_log_buf);
*btf_log_buf = '\0';
@@ -6261,10 +6169,7 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
if (btf_fd != -1)
@@ -6274,22 +6179,6 @@ done:
if (!IS_ERR(patched_linfo))
free(patched_linfo);
-
- return err;
-}
-
-static int test_info_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.info_raw_test_num)
- return count_result(do_test_info_raw(args.info_raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
- err |= count_result(do_test_info_raw(i));
-
- return err;
}
struct btf_raw_data {
@@ -6754,7 +6643,7 @@ static void dump_btf_strings(const char *strs, __u32 len)
}
}
-static int do_test_dedup(unsigned int test_num)
+static void do_test_dedup(unsigned int test_num)
{
const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
@@ -6769,13 +6658,15 @@ static int do_test_dedup(unsigned int test_num)
void *raw_btf;
int err = 0, i;
- fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types,
test->input.str_sec, test->input.str_sec_size,
&raw_btf_size, &ret_test_next_str);
if (!raw_btf)
- return -1;
+ return;
+
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
free(raw_btf);
if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
@@ -6789,7 +6680,7 @@ static int do_test_dedup(unsigned int test_num)
test->expect.str_sec_size,
&raw_btf_size, &ret_expect_next_str);
if (!raw_btf)
- return -1;
+ return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
free(raw_btf);
if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
@@ -6894,174 +6785,27 @@ static int do_test_dedup(unsigned int test_num)
}
done:
- if (!err)
- fprintf(stderr, "OK");
if (!IS_ERR(test_btf))
btf__free(test_btf);
if (!IS_ERR(expect_btf))
btf__free(expect_btf);
-
- return err;
}
-static int test_dedup(void)
+void test_btf(void)
{
- unsigned int i;
- int err = 0;
+ int i;
- if (args.dedup_test_num)
- return count_result(do_test_dedup(args.dedup_test_num));
+ always_log = env.verbosity > VERBOSE_NONE;
+ for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+ do_test_raw(i);
+ for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+ do_test_get_info(i);
+ for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+ do_test_file(i);
+ for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
+ do_test_info_raw(i);
for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++)
- err |= count_result(do_test_dedup(i));
-
- return err;
-}
-
-static void usage(const char *cmd)
-{
- fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n"
- "\t[-g btf_get_info_test_num (1 - %zu)] |\n"
- "\t[-f btf_file_test_num (1 - %zu)] |\n"
- "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n"
- "\t[-p (pretty print test)] |\n"
- "\t[-d btf_dedup_test_num (1 - %zu)]]\n",
- cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
- ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests),
- ARRAY_SIZE(dedup_tests));
-}
-
-static int parse_args(int argc, char **argv)
-{
- const char *optstr = "hlpk:f:r:g:d:";
- int opt;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
- switch (opt) {
- case 'l':
- args.always_log = true;
- break;
- case 'f':
- args.file_test_num = atoi(optarg);
- args.file_test = true;
- break;
- case 'r':
- args.raw_test_num = atoi(optarg);
- args.raw_test = true;
- break;
- case 'g':
- args.get_info_test_num = atoi(optarg);
- args.get_info_test = true;
- break;
- case 'p':
- args.pprint_test = true;
- break;
- case 'k':
- args.info_raw_test_num = atoi(optarg);
- args.info_raw_test = true;
- break;
- case 'd':
- args.dedup_test_num = atoi(optarg);
- args.dedup_test = true;
- break;
- case 'h':
- usage(argv[0]);
- exit(0);
- default:
- usage(argv[0]);
- return -1;
- }
- }
-
- if (args.raw_test_num &&
- (args.raw_test_num < 1 ||
- args.raw_test_num > ARRAY_SIZE(raw_tests))) {
- fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(raw_tests));
- return -1;
- }
-
- if (args.file_test_num &&
- (args.file_test_num < 1 ||
- args.file_test_num > ARRAY_SIZE(file_tests))) {
- fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
- ARRAY_SIZE(file_tests));
- return -1;
- }
-
- if (args.get_info_test_num &&
- (args.get_info_test_num < 1 ||
- args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
- fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
- ARRAY_SIZE(get_info_tests));
- return -1;
- }
-
- if (args.info_raw_test_num &&
- (args.info_raw_test_num < 1 ||
- args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) {
- fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(info_raw_tests));
- return -1;
- }
-
- if (args.dedup_test_num &&
- (args.dedup_test_num < 1 ||
- args.dedup_test_num > ARRAY_SIZE(dedup_tests))) {
- fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n",
- ARRAY_SIZE(dedup_tests));
- return -1;
- }
-
- return 0;
-}
-
-static void print_summary(void)
-{
- fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
- pass_cnt - skip_cnt, skip_cnt, error_cnt);
-}
-
-int main(int argc, char **argv)
-{
- int err = 0;
-
- err = parse_args(argc, argv);
- if (err)
- return err;
-
- if (args.always_log)
- libbpf_set_print(__base_pr);
-
- if (args.raw_test)
- err |= test_raw();
-
- if (args.get_info_test)
- err |= test_get_info();
-
- if (args.file_test)
- err |= test_file();
-
- if (args.pprint_test)
- err |= test_pprint();
-
- if (args.info_raw_test)
- err |= test_info_raw();
-
- if (args.dedup_test)
- err |= test_dedup();
-
- if (args.raw_test || args.get_info_test || args.file_test ||
- args.pprint_test || args.info_raw_test || args.dedup_test)
- goto done;
-
- err |= test_raw();
- err |= test_get_info();
- err |= test_file();
- err |= test_info_raw();
- err |= test_dedup();
-
-done:
- print_summary();
- return err;
+ do_test_dedup(i);
+ test_pprint();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index f259085cca6a..9781d85cb223 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -12,10 +12,13 @@
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
#define ENCAP_PORT (1234)
+static int duration = 0;
+
struct addr_port {
in_port_t port;
union {
@@ -361,30 +364,18 @@ static void close_fds(int *fds, int n)
close(fds[i]);
}
-void test_cls_redirect(void)
+static void test_cls_redirect_common(struct bpf_program *prog)
{
- struct test_cls_redirect *skel = NULL;
struct bpf_prog_test_run_attr tattr = {};
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
struct sockaddr *addr;
socklen_t slen;
int i, j, err;
-
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
- skel = test_cls_redirect__open();
- if (CHECK_FAIL(!skel))
- return;
-
- skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
- skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
-
- if (CHECK_FAIL(test_cls_redirect__load(skel)))
- goto cleanup;
-
addr = (struct sockaddr *)&ss;
for (i = 0; i < ARRAY_SIZE(families); i++) {
slen = prepare_addr(&ss, families[i]);
@@ -402,7 +393,7 @@ void test_cls_redirect(void)
goto cleanup;
}
- tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
+ tattr.prog_fd = bpf_program__fd(prog);
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct test_cfg *test = &tests[i];
@@ -450,7 +441,58 @@ void test_cls_redirect(void)
}
cleanup:
- test_cls_redirect__destroy(skel);
close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
+
+static void test_cls_redirect_inlined(void)
+{
+ struct test_cls_redirect *skel;
+ int err;
+
+ skel = test_cls_redirect__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect__destroy(skel);
+}
+
+static void test_cls_redirect_subprogs(void)
+{
+ struct test_cls_redirect_subprogs *skel;
+ int err;
+
+ skel = test_cls_redirect_subprogs__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_subprogs__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_subprogs__destroy(skel);
+}
+
+void test_cls_redirect(void)
+{
+ if (test__start_subtest("cls_redirect_inlined"))
+ test_cls_redirect_inlined();
+ if (test__start_subtest("cls_redirect_subprogs"))
+ test_cls_redirect_subprogs();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
index fc12e0d445ff..0a577a248d34 100644
--- a/tools/testing/selftests/bpf/prog_tests/d_path.c
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -120,6 +120,16 @@ void test_d_path(void)
if (err < 0)
goto cleanup;
+ if (CHECK(!bss->called_stat,
+ "stat",
+ "trampoline for security_inode_getattr was not called\n"))
+ goto cleanup;
+
+ if (CHECK(!bss->called_close,
+ "close",
+ "trampoline for filp_close was not called\n"))
+ goto cleanup;
+
for (int i = 0; i < MAX_FILES; i++) {
CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN),
"check",
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index a550dab9ba7a..eda682727787 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -208,11 +208,18 @@ static void test_func_map_prog_compatibility(void)
void test_fexit_bpf2bpf(void)
{
- test_target_no_callees();
- test_target_yes_callees();
- test_func_replace();
- test_func_replace_verify();
- test_func_sockmap_update();
- test_func_replace_return_code();
- test_func_map_prog_compatibility();
+ if (test__start_subtest("target_no_callees"))
+ test_target_no_callees();
+ if (test__start_subtest("target_yes_callees"))
+ test_target_yes_callees();
+ if (test__start_subtest("func_replace"))
+ test_func_replace();
+ if (test__start_subtest("func_replace_verify"))
+ test_func_replace_verify();
+ if (test__start_subtest("func_sockmap_update"))
+ test_func_sockmap_update();
+ if (test__start_subtest("func_replace_return_code"))
+ test_func_replace_return_code();
+ if (test__start_subtest("func_map_prog_compatibility"))
+ test_func_map_prog_compatibility();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 3bdaa5a40744..ee46b11f1f9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -12,7 +12,8 @@ void test_global_data_init(void)
size_t sz;
obj = bpf_object__open_file(file, NULL);
- if (CHECK_FAIL(!obj))
+ err = libbpf_get_error(obj);
+ if (CHECK_FAIL(err))
return;
map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
index e3d6777226a8..b771804b2342 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -32,6 +32,7 @@ out:
void test_ksyms(void)
{
+ __u64 per_cpu_start_addr = kallsyms_find("__per_cpu_start");
__u64 link_fops_addr = kallsyms_find("bpf_link_fops");
const char *btf_path = "/sys/kernel/btf/vmlinux";
struct test_ksyms *skel;
@@ -63,8 +64,9 @@ void test_ksyms(void)
"got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
CHECK(data->out__btf_size != btf_size, "btf_size",
"got %llu, exp %llu\n", data->out__btf_size, btf_size);
- CHECK(data->out__per_cpu_start != 0, "__per_cpu_start",
- "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0);
+ CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
+ "got %llu, exp %llu\n", data->out__per_cpu_start,
+ per_cpu_start_addr);
cleanup:
test_ksyms__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index c2d373e294bb..8073105548ff 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -80,9 +80,8 @@ out:
void test_l4lb_all(void)
{
- const char *file1 = "./test_l4lb.o";
- const char *file2 = "./test_l4lb_noinline.o";
-
- test_l4lb(file1);
- test_l4lb(file2);
+ if (test__start_subtest("l4lb_inline"))
+ test_l4lb("test_l4lb.o");
+ if (test__start_subtest("l4lb_noinline"))
+ test_l4lb("test_l4lb_noinline.o");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c
new file mode 100644
index 000000000000..2c53eade88e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/metadata.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "metadata_unused.skel.h"
+#include "metadata_used.skel.h"
+
+static int duration;
+
+static int prog_holds_map(int prog_fd, int map_fd)
+{
+ struct bpf_prog_info prog_info = {};
+ struct bpf_prog_info map_info = {};
+ __u32 prog_info_len;
+ __u32 map_info_len;
+ __u32 *map_ids;
+ int nr_maps;
+ int ret;
+ int i;
+
+ map_info_len = sizeof(map_info);
+ ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ if (ret)
+ return -errno;
+
+ prog_info_len = sizeof(prog_info);
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ return -errno;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+ if (!map_ids)
+ return -ENOMEM;
+
+ nr_maps = prog_info.nr_map_ids;
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.nr_map_ids = nr_maps;
+ prog_info.map_ids = ptr_to_u64(map_ids);
+ prog_info_len = sizeof(prog_info);
+
+ ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret) {
+ ret = -errno;
+ goto free_map_ids;
+ }
+
+ ret = -ENOENT;
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ if (map_ids[i] == map_info.id) {
+ ret = 0;
+ break;
+ }
+ }
+
+free_map_ids:
+ free(map_ids);
+ return ret;
+}
+
+static void test_metadata_unused(void)
+{
+ struct metadata_unused *obj;
+ int err;
+
+ obj = metadata_unused__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "bpf_metadata_a", "expected \"foo\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b",
+ "expected 1, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_unused__destroy(obj);
+}
+
+static void test_metadata_used(void)
+{
+ struct metadata_used *obj;
+ int err;
+
+ obj = metadata_used__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "metadata_a", "expected \"bar\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b",
+ "expected 2, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_used__destroy(obj);
+}
+
+void test_metadata(void)
+{
+ if (test__start_subtest("unused"))
+ test_metadata_unused();
+
+ if (test__start_subtest("used"))
+ test_metadata_used();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index fc0d7f4f02cf..ac1ee10cffd8 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -27,7 +27,7 @@ void test_reference_tracking(void)
const char *title;
/* Ignore .text sections */
- title = bpf_program__title(prog, false);
+ title = bpf_program__section_name(prog);
if (strstr(title, ".text") != NULL)
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index a49a26f95a8b..3a469099f30d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -265,7 +265,7 @@ void test_sk_assign(void)
TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
};
- int server = -1;
+ __s64 server = -1;
int server_map;
int self_net;
int i;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 0b79d78b98db..4b7a527e7e82 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -6,6 +6,9 @@
#include "test_skmsg_load_helpers.skel.h"
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
+#include "bpf_iter_sockmap.skel.h"
+
+#include "progs/bpf_iter_sockmap.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -171,6 +174,88 @@ static void test_sockmap_invalid_update(void)
test_sockmap_invalid_update__destroy(skel);
}
+static void test_sockmap_iter(enum bpf_map_type map_type)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, len, src_fd, iter_fd, duration = 0;
+ union bpf_iter_link_info linfo = {0};
+ __s64 sock_fd[SOCKMAP_MAX_ENTRIES];
+ __u32 i, num_sockets, max_elems;
+ struct bpf_iter_sockmap *skel;
+ struct bpf_link *link;
+ struct bpf_map *src;
+ char buf[64];
+
+ skel = bpf_iter_sockmap__open_and_load();
+ if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(sock_fd); i++)
+ sock_fd[i] = -1;
+
+ /* Make sure we have at least one "empty" entry to test iteration of
+ * an empty slot.
+ */
+ num_sockets = ARRAY_SIZE(sock_fd) - 1;
+
+ if (map_type == BPF_MAP_TYPE_SOCKMAP) {
+ src = skel->maps.sockmap;
+ max_elems = bpf_map__max_entries(src);
+ } else {
+ src = skel->maps.sockhash;
+ max_elems = num_sockets;
+ }
+
+ src_fd = bpf_map__fd(src);
+
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = connected_socket_v4();
+ if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n"))
+ goto out;
+
+ err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
+ if (CHECK(err, "map_update", "failed: %s\n", strerror(errno)))
+ goto out;
+ }
+
+ linfo.map.map_fd = src_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.count_elems, &opts);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno)))
+ goto close_iter;
+
+ /* test results */
+ if (CHECK(skel->bss->elems != max_elems, "elems", "got %u expected %u\n",
+ skel->bss->elems, max_elems))
+ goto close_iter;
+
+ if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n",
+ skel->bss->socks, num_sockets))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; i < num_sockets; i++) {
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ }
+ bpf_iter_sockmap__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -187,4 +272,8 @@ void test_sockmap_basic(void)
test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap update in unsafe context"))
test_sockmap_invalid_update();
+ if (test__start_subtest("sockmap iter"))
+ test_sockmap_iter(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash iter"))
+ test_sockmap_iter(BPF_MAP_TYPE_SOCKHASH);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 5f54c6aec7f0..b25c9c45c148 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -45,9 +45,9 @@ static int getsetsockopt(void)
goto err;
}
- if (*(int *)big_buf != 0x08) {
+ if (*big_buf != 0x08) {
log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08",
- *(int *)big_buf);
+ (int)*big_buf);
goto err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
new file mode 100644
index 000000000000..a00abf58c037
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <time.h>
+#include "test_subprogs.skel.h"
+
+static int duration;
+
+void test_subprogs(void)
+{
+ struct test_subprogs *skel;
+ int err;
+
+ skel = test_subprogs__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = test_subprogs__attach(skel);
+ if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12);
+ CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17);
+ CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
+ CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
+
+cleanup:
+ test_subprogs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index bb8fe646dd9f..ee27d68d2a1c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
/* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -472,6 +473,329 @@ out:
bpf_object__close(obj);
}
+/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working
+ * correctly in correlation with BPF subprograms
+ */
+static void test_tailcall_bpf2bpf_1(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ /* nop -> jmp */
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != 1, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+
+ /* jmp -> nop, call subprog that will do tailcall */
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ /* make sure that subprog can access ctx and entry prog that
+ * called this subprog can properly return
+ */
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ 0, &retval, &duration);
+ CHECK(err || retval != sizeof(pkt_v4) * 2,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit
+ * enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call.
+ */
+static void test_tailcall_bpf2bpf_2(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to
+ * 256 bytes) can be used within bpf subprograms that have the tailcalls
+ * in them
+ */
+static void test_tailcall_bpf2bpf_3(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 3,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4),
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 2,
+ "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
+ * across tailcalls combined with bpf2bpf calls. for making sure that tailcall
+ * counter behaves correctly, bpf program will go through following flow:
+ *
+ * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 ->
+ * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 ->
+ * subprog2 [here bump global counter] --------^
+ *
+ * We go through first two tailcalls and start counting from the subprog2 where
+ * the loop begins. At the end of the test make sure that the global counter is
+ * equal to 31, because tailcall counter includes the first two tailcalls
+ * whereas global counter is incremented only on loop presented on flow above.
+ */
+static void test_tailcall_bpf2bpf_4(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+out:
+ bpf_object__close(obj);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -484,4 +808,12 @@ void test_tailcalls(void)
test_tailcall_4();
if (test__start_subtest("tailcall_5"))
test_tailcall_5();
+ if (test__start_subtest("tailcall_bpf2bpf_1"))
+ test_tailcall_bpf2bpf_1();
+ if (test__start_subtest("tailcall_bpf2bpf_2"))
+ test_tailcall_bpf2bpf_2();
+ if (test__start_subtest("tailcall_bpf2bpf_3"))
+ test_tailcall_bpf2bpf_3();
+ if (test__start_subtest("tailcall_bpf2bpf_4"))
+ test_tailcall_bpf2bpf_4();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index f284f72158ef..a1f06424cf83 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,11 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
+#include "test_xdp_noinline.skel.h"
void test_xdp_noinline(void)
{
- const char *file = "./test_xdp_noinline.o";
unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct test_xdp_noinline *skel;
struct vip key = {.protocol = 6};
struct vip_meta {
__u32 flags;
@@ -25,58 +26,42 @@ void test_xdp_noinline(void)
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
__u32 duration, retval, size;
- int err, i, prog_fd, map_fd;
+ int err, i;
__u64 bytes = 0, pkts = 0;
- struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ skel = test_xdp_noinline__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "failed\n"))
return;
- map_fd = bpf_find_map(__func__, obj, "vip_map");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
- map_fd = bpf_find_map(__func__, obj, "ch_rings");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
-
- map_fd = bpf_find_map(__func__, obj, "reals");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
-
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
+ NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
+ NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
CHECK(err || retval != 1 || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
- map_fd = bpf_find_map(__func__, obj, "stats");
- if (map_fd < 0)
- goto out;
- bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
- pkts != NUM_ITER * 2)) {
- printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
- bytes, pkts);
- }
-out:
- bpf_object__close(obj);
+ CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
+ "stats", "bytes %lld pkts %lld\n",
+ (unsigned long long)bytes, (unsigned long long)pkts);
+ test_xdp_noinline__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index c196280df90d..df682af75510 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -13,6 +13,7 @@
#define udp6_sock udp6_sock___not_used
#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
+#define bpf_iter__sockmap bpf_iter__sockmap___not_used
#include "vmlinux.h"
#undef bpf_iter_meta
#undef bpf_iter__bpf_map
@@ -26,6 +27,7 @@
#undef udp6_sock
#undef bpf_iter__bpf_map_elem
#undef bpf_iter__bpf_sk_storage_map
+#undef bpf_iter__sockmap
struct bpf_iter_meta {
struct seq_file *seq;
@@ -96,3 +98,10 @@ struct bpf_iter__bpf_sk_storage_map {
struct sock *sk;
void *value;
};
+
+struct bpf_iter__sockmap {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ struct sock *sk;
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
new file mode 100644
index 000000000000..0e27f73dd803
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Cloudflare */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include "bpf_iter_sockmap.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, SOCKMAP_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, SOCKMAP_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockhash SEC(".maps");
+
+__u32 elems = 0;
+__u32 socks = 0;
+
+SEC("iter/sockmap")
+int count_elems(struct bpf_iter__sockmap *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 tmp, *key = ctx->key;
+ int ret;
+
+ if (key)
+ elems++;
+
+ if (sk)
+ socks++;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h
new file mode 100644
index 000000000000..35a675d13c0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define SOCKMAP_MAX_ENTRIES (64)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 8b787baa2654..b2f7c7c5f952 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -6,6 +6,9 @@
char _license[] SEC("license") = "GPL";
+int count = 0;
+int tgid = 0;
+
SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx)
{
@@ -17,8 +20,13 @@ int dump_task_file(struct bpf_iter__task_file *ctx)
if (task == (void *)0 || file == (void *)0)
return 0;
- if (ctx->meta->seq_num == 0)
+ if (ctx->meta->seq_num == 0) {
+ count = 0;
BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+ }
+
+ if (tgid == task->tgid && task->tgid != task->pid)
+ count++;
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
(long)file->f_op);
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index 982a2d8aa844..c325405751e2 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -82,6 +82,14 @@ static inline int check_default(struct bpf_map *indirect,
return 1;
}
+static __noinline int
+check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
typedef struct {
int counter;
} atomic_t;
@@ -107,7 +115,7 @@ static inline int check_hash(void)
struct bpf_map *map = (struct bpf_map *)&m_hash;
int i;
- VERIFY(check_default(&hash->map, map));
+ VERIFY(check_default_noinline(&hash->map, map));
VERIFY(hash->n_buckets == MAX_ENTRIES);
VERIFY(hash->elem_size == 64);
diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c
new file mode 100644
index 000000000000..672a0d19f8d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_unused.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "foo";
+volatile const int bpf_metadata_b SEC(".rodata") = 1;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c
new file mode 100644
index 000000000000..b7198e65383d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_used.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "bar";
+volatile const int bpf_metadata_b SEC(".rodata") = 2;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return bpf_metadata_b ? 1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index cc615b82b56e..2fb7adafb6b6 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -67,7 +67,12 @@ typedef struct {
void* co_name; // PyCodeObject.co_name
} FrameData;
-static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *get_thread_state(void *tls_base, PidData *pidData)
{
void* thread_state;
int key;
@@ -155,7 +160,9 @@ struct {
} stackmap SEC(".maps");
#ifdef GLOBAL_FUNC
-__attribute__((noinline))
+__noinline
+#elif defined(SUBPROGS)
+static __noinline
#else
static __always_inline
#endif
diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
new file mode 100644
index 000000000000..60e27a7f0cca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define STACK_MAX_LEN 50
+#define SUBPROGS
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index ad61b722a9de..7de534f38c3f 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -266,8 +266,12 @@ struct tls_index {
uint64_t offset;
};
-static __always_inline void *calc_location(struct strobe_value_loc *loc,
- void *tls_base)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
{
/*
* tls_mode value is:
@@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
: NULL;
}
-static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void read_int_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data)
{
void *location = calc_location(&cfg->int_locs[idx], tls_base);
if (!location)
@@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
*/
-static __always_inline void *read_strobe_meta(struct task_struct *task,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *read_strobe_meta(struct task_struct *task,
+ struct strobemeta_payload *data)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
new file mode 100644
index 000000000000..b6c01f8fc559
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#define SUBPROGS
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
new file mode 100644
index 000000000000..b5d9c8e778ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 0);
+
+ return skb->len * 2;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 1);
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
new file mode 100644
index 000000000000..a004ab28ce28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ if (load_byte(skb, 0))
+ bpf_tail_call(skb, &jmp_table, 1);
+ else
+ bpf_tail_call(skb, &jmp_table, 0);
+ return 1;
+}
+
+static volatile int count;
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 0);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
new file mode 100644
index 000000000000..96dbef2b6b7c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+__noinline
+int subprog_tail2(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ if (load_word(skb, 0) || load_half(skb, 0))
+ bpf_tail_call(skb, &jmp_table, 10);
+ else
+ bpf_tail_call(skb, &jmp_table, 1);
+
+ return skb->len;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ bpf_tail_call(skb, &jmp_table, 0);
+
+ return skb->len * 2;
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return subprog_tail2(skb);
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return skb->len * 3;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
new file mode 100644
index 000000000000..98b40a95bc67
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int count;
+
+__noinline
+int subprog_tail_2(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 2);
+ return skb->len * 3;
+}
+
+__noinline
+int subprog_tail_1(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 1);
+ return skb->len * 2;
+}
+
+__noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 0);
+ return skb->len;
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ return subprog_tail_2(skb);
+}
+
+SEC("classifier/2")
+int bpf_func_2(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail_2(skb);
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ return subprog_tail_1(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f0b72e86bee5..c9f8464996ea 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -22,6 +22,12 @@
#include "test_cls_redirect.h"
+#ifdef SUBPROGS
+#define INLINING __noinline
+#else
+#define INLINING __always_inline
+#endif
+
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
@@ -125,7 +131,7 @@ typedef struct buf {
uint8_t *const tail;
} buf_t;
-static size_t buf_off(const buf_t *buf)
+static __always_inline size_t buf_off(const buf_t *buf)
{
/* Clang seems to optimize constructs like
* a - b + c
@@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf)
return off;
}
-static bool buf_copy(buf_t *buf, void *dst, size_t len)
+static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
{
if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
return false;
@@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len)
return true;
}
-static bool buf_skip(buf_t *buf, const size_t len)
+static __always_inline bool buf_skip(buf_t *buf, const size_t len)
{
/* Check whether off + len is valid in the non-linear part. */
if (buf_off(buf) + len > buf->skb->len) {
@@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len)
* If scratch is not NULL, the function will attempt to load non-linear
* data via bpf_skb_load_bytes. On success, scratch is returned.
*/
-static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
{
if (buf->head + len > buf->tail) {
if (scratch == NULL) {
@@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
return ptr;
}
-static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
{
if (ipv4->ihl <= 5) {
return true;
@@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
return buf_skip(buf, (ipv4->ihl - 5) * 4);
}
-static bool ipv4_is_fragment(const struct iphdr *ip)
+static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
{
uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
}
-static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
{
struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
if (ipv4 == NULL) {
@@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
}
/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
-static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
{
if (!buf_copy(pkt, ports, sizeof(*ports))) {
return false;
@@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
return true;
}
-static uint16_t pkt_checksum_fold(uint32_t csum)
+static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
{
/* The highest reasonable value for an IPv4 header
* checksum requires two folds, so we just do that always.
@@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum)
return (uint16_t)~csum;
}
-static void pkt_ipv4_checksum(struct iphdr *iph)
+static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
{
iph->check = 0;
@@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
iph->check = pkt_checksum_fold(acc);
}
-static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
- const struct ipv6hdr *ipv6,
- uint8_t *upper_proto,
- bool *is_fragment)
+static INLINING
+bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+ const struct ipv6hdr *ipv6,
+ uint8_t *upper_proto,
+ bool *is_fragment)
{
/* We understand five extension headers.
* https://tools.ietf.org/html/rfc8200#section-4.1 states that all
@@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
* scratch is allocated on the stack. However, this usage should be safe since
* it's the callers stack after all.
*/
-static inline __attribute__((__always_inline__)) struct ipv6hdr *
+static __always_inline struct ipv6hdr *
pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
bool *is_fragment)
{
@@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
/* Global metrics, per CPU
*/
-struct bpf_map_def metrics_map SEC("maps") = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(unsigned int),
- .value_size = sizeof(metrics_t),
- .max_entries = 1,
-};
-
-static metrics_t *get_global_metrics(void)
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static INLINING metrics_t *get_global_metrics(void)
{
uint64_t key = 0;
return bpf_map_lookup_elem(&metrics_map, &key);
}
-static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
{
const int payload_off =
sizeof(*encap) +
@@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
}
-static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
- struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
{
metrics->forwarded_packets_total_gre++;
@@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
-static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
- struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
{
/* swap L2 addresses */
/* This assumes that packets are received from a router.
@@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
-static ret_t skip_next_hops(buf_t *pkt, int n)
+static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
{
switch (n) {
case 1:
@@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n)
* pkt is positioned just after the variable length GLB header
* iff the call is successful.
*/
-static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
- struct in_addr *next_hop)
+static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+ struct in_addr *next_hop)
{
if (encap->unigue.next_hop > encap->unigue.hop_count) {
return TC_ACT_SHOT;
@@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
* return value, and calling code works while still being "generic" to
* IPv4 and IPv6.
*/
-static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
- uint64_t iphlen, uint16_t sport, uint16_t dport)
+static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
{
switch (iphlen) {
case sizeof(struct iphdr): {
@@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
}
}
-static verdict_t classify_tcp(struct __sk_buff *skb,
- struct bpf_sock_tuple *tuple, uint64_t tuplen,
- void *iph, struct tcphdr *tcp)
+static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ void *iph, struct tcphdr *tcp)
{
struct bpf_sock *sk =
bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb,
return UNKNOWN;
}
-static verdict_t classify_udp(struct __sk_buff *skb,
- struct bpf_sock_tuple *tuple, uint64_t tuplen)
+static INLINING verdict_t classify_udp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen)
{
struct bpf_sock *sk =
bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb,
return UNKNOWN;
}
-static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
- struct bpf_sock_tuple *tuple, uint64_t tuplen,
- metrics_t *metrics)
+static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ metrics_t *metrics)
{
switch (proto) {
case IPPROTO_TCP:
@@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
}
}
-static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
{
struct icmphdr icmp;
if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
@@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
sizeof(tuple.ipv4), metrics);
}
-static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
{
struct icmp6hdr icmp6;
if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
@@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
metrics);
}
-static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
- metrics_t *metrics)
+static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
{
metrics->l4_protocol_packets_total_tcp++;
@@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
}
-static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
- metrics_t *metrics)
+static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
{
metrics->l4_protocol_packets_total_udp++;
@@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_udp(pkt->skb, &tuple, tuplen);
}
-static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv4++;
@@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
}
}
-static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv6++;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
new file mode 100644
index 000000000000..eed26b70e3a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
@@ -0,0 +1,2 @@
+#define SUBPROGS
+#include "test_cls_redirect.c"
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
index 61f007855649..84e1f883f97b 100644
--- a/tools/testing/selftests/bpf/progs/test_d_path.c
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -15,7 +15,10 @@ char paths_close[MAX_FILES][MAX_PATH_LEN] = {};
int rets_stat[MAX_FILES] = {};
int rets_close[MAX_FILES] = {};
-SEC("fentry/vfs_getattr")
+int called_stat = 0;
+int called_close = 0;
+
+SEC("fentry/security_inode_getattr")
int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
__u32 request_mask, unsigned int query_flags)
{
@@ -23,6 +26,8 @@ int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
__u32 cnt = cnt_stat;
int ret;
+ called_stat = 1;
+
if (pid != my_pid)
return 0;
@@ -42,6 +47,8 @@ int BPF_PROG(prog_close, struct file *file, void *id)
__u32 cnt = cnt_close;
int ret;
+ called_close = 1;
+
if (pid != my_pid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 28351936a438..b9e2753f4f91 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -17,9 +17,7 @@
#include "test_iptunnel_common.h"
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static u32 jhash(const void *key, u32 length, u32 initval)
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
const unsigned char *k = key;
@@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
b += initval;
@@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-static u32 jhash_2words(u32 a, u32 b, u32 initval)
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
}
@@ -200,8 +198,7 @@ struct {
__type(value, struct ctl_value);
} ctl_array SEC(".maps");
-static __u32 get_packet_hash(struct packet_description *pckt,
- bool ipv6)
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
{
if (ipv6)
return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
@@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt,
return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
}
-static bool get_packet_dst(struct real_definition **real,
- struct packet_description *pckt,
- struct vip_meta *vip_info,
- bool is_ipv6)
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
{
__u32 hash = get_packet_hash(pckt, is_ipv6);
__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
@@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real,
return true;
}
-static int parse_icmpv6(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmp6hdr *icmp_hdr;
struct ipv6hdr *ip6h;
@@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static int parse_icmp(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmphdr *icmp_hdr;
struct iphdr *iph;
@@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static bool parse_udp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct udphdr *udp;
udp = data + off;
@@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end,
return true;
}
-static bool parse_tcp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct tcphdr *tcp;
@@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end,
return true;
}
-static int process_packet(void *data, __u64 off, void *data_end,
- bool is_ipv6, struct __sk_buff *skb)
+static __noinline int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
{
void *pkt_start = (void *)(long)skb->data;
struct packet_description pckt = {};
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
new file mode 100644
index 000000000000..d3c5673c0218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -0,0 +1,103 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__noinline int sub1(int x)
+{
+ return x + 1;
+}
+
+static __noinline int sub5(int v);
+
+__noinline int sub2(int y)
+{
+ return sub5(y + 2);
+}
+
+static __noinline int sub3(int z)
+{
+ return z + 3 + sub1(4);
+}
+
+static __noinline int sub4(int w)
+{
+ return w + sub3(5) + sub1(6);
+}
+
+/* sub5() is an identitify function, just to test weirder functions layout and
+ * call patterns
+ */
+static __noinline int sub5(int v)
+{
+ return sub1(v) - 1; /* compensates sub1()'s + 1 */
+}
+
+/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer
+ * type, so we need to accept pointer as integer and then cast it inside the
+ * function
+ */
+__noinline int get_task_tgid(uintptr_t t)
+{
+ /* this ensures that CO-RE relocs work in multi-subprogs .text */
+ return BPF_CORE_READ((struct task_struct *)(void *)t, tgid);
+}
+
+int res1 = 0;
+int res2 = 0;
+int res3 = 0;
+int res4 = 0;
+
+SEC("raw_tp/sys_enter")
+int prog1(void *ctx)
+{
+ /* perform some CO-RE relocations to ensure they work with multi-prog
+ * sections correctly
+ */
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */
+ return 0;
+}
+
+/* prog3 has the same section name as prog1 */
+SEC("raw_tp/sys_enter")
+int prog3(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
+ return 0;
+}
+
+/* prog4 has the same section name as prog2 */
+SEC("raw_tp/sys_exit")
+int prog4(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 458b0d69133e..553a282d816a 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index b2e6f9b0894d..2b64bc563a12 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -18,11 +18,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 50525235380e..5489823c83fc 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -19,11 +19,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 8beecec166d9..3a67921f62b5 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -16,7 +16,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static __attribute__ ((noinline))
+static __noinline
u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
@@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
@@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
@@ -213,7 +213,7 @@ struct eth_hdr {
unsigned short eth_proto;
};
-static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
{
__u64 off = sizeof(struct eth_hdr);
if (is_ipv6) {
@@ -797,8 +797,8 @@ out:
return XDP_DROP;
}
-__attribute__ ((section("xdp-test"), used))
-int balancer_ingress(struct xdp_md *ctx)
+SEC("xdp-test-v4")
+int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
@@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx)
eth_proto = bpf_ntohs(eth->eth_proto);
if (eth_proto == ETH_P_IP)
return process_packet(data, nh_off, data_end, 0, ctx);
- else if (eth_proto == ETH_P_IPV6)
+ else
+ return XDP_DROP;
+}
+
+SEC("xdp-test-v6")
+int balancer_ingress_v6(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = bpf_ntohs(eth->eth_proto);
+ if (eth_proto == ETH_P_IPV6)
return process_packet(data, nh_off, data_end, 1, ctx);
else
return XDP_DROP;
}
-char _license[] __attribute__ ((section("license"), used)) = "GPL";
-int _version __attribute__ ((section("version"), used)) = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..2db3c60e1e61 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,6 +85,23 @@ make_with_tmpdir() {
echo
}
+make_doc_and_clean() {
+ echo -e "\$PWD: $PWD"
+ echo -e "command: make -s $* doc >/dev/null"
+ RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
+ if [ $? -ne 0 ] ; then
+ ERROR=1
+ printf "FAILURE: Errors or warnings when building documentation\n"
+ fi
+ (
+ if [ $# -ge 1 ] ; then
+ cd ${@: -1}
+ fi
+ make -s doc-clean
+ )
+ echo
+}
+
echo "Trying to build bpftool"
echo -e "... through kbuild\n"
@@ -145,3 +162,7 @@ make_and_clean
make_with_tmpdir OUTPUT
make_with_tmpdir O
+
+echo -e "Checking documentation build\n"
+# From tools/bpf/bpftool
+make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
new file mode 100755
index 000000000000..1bf81b49457a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME=bpftool_metadata
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+ set +e
+ rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+ echo "selftests: $TESTNAME [SKIP] Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+ exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+ exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+mkdir $BPF_DIR
+
+trap cleanup EXIT
+
+bpftool prog load metadata_unused.o $BPF_DIR/unused
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/unused
+
+bpftool prog load metadata_used.o $BPF_DIR/used
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/used
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 154a8fd2a48d..ca7ca87e91aa 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -151,7 +151,7 @@ static int run_test(int cgfd)
}
bpf_object__for_each_program(prog, pobj) {
- prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+ prog_name = bpf_program__section_name(prog);
if (libbpf_attach_type_by_name(prog_name, &attach_type))
goto err;
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 94258c6b5235..c4f5d909e58a 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -647,13 +647,14 @@
.result = REJECT,
},
{
- "calls: ld_abs with changing ctx data in callee",
+ "calls: subprog call with ld_abs in main prog",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LD_ABS(BPF_B, 0),
BPF_LD_ABS(BPF_H, 0),
BPF_LD_ABS(BPF_W, 0),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
BPF_LD_ABS(BPF_B, 0),
@@ -666,8 +667,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
- .result = REJECT,
+ .result = ACCEPT,
},
{
"calls: two calls with bad fallthrough",
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index b52209db8250..637f9293bda8 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -60,3 +60,35 @@
.result = ACCEPT,
.retval = 1,
},
+{
+ "bpf_map_ptr: r = 0, map_ptr = map_ptr + r",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 4 },
+ .result = ACCEPT,
+},
+{
+ "bpf_map_ptr: r = 0, r = r + map_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 4 },
+ .result = ACCEPT,
+},