aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst30
-rw-r--r--Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml102
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/boot/dts/uniphier-pxs2.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi4
-rw-r--r--drivers/net/bonding/bond_main.c7
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c152
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h23
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c74
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h1
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c10
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c276
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h193
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c293
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c256
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c14
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_devlink.c1
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c35
-rw-r--r--drivers/net/ethernet/sfc/ef10.c3
-rw-r--r--drivers/net/ethernet/sfc/farch.c16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c12
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c4
-rw-r--r--drivers/net/ethernet/xscale/Kconfig1
-rw-r--r--drivers/net/ethernet/xscale/ixp4xx_eth.c215
-rw-r--r--drivers/net/geneve.c4
-rw-r--r--drivers/net/hyperv/netvsc_drv.c14
-rw-r--r--drivers/net/macvlan.c19
-rw-r--r--drivers/net/phy/intel-xway.c21
-rw-r--r--drivers/net/phy/marvell.c52
-rw-r--r--drivers/net/usb/r8152.c14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c7
-rw-r--r--include/linux/bpf-cgroup.h1
-rw-r--r--include/linux/bpf.h23
-rw-r--r--include/linux/bpf_verifier.h9
-rw-r--r--include/linux/mlx5/eswitch.h11
-rw-r--r--include/linux/mlx5/vport.h8
-rw-r--r--include/linux/platform_data/eth_ixp4xx.h2
-rw-r--r--include/linux/skmsg.h5
-rw-r--r--include/net/devlink.h5
-rw-r--r--include/net/sock.h5
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/udp.h2
-rw-r--r--include/uapi/linux/bpf.h67
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--kernel/bpf/core.c7
-rw-r--r--kernel/bpf/helpers.c306
-rw-r--r--kernel/bpf/inode.c2
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/verifier.c84
-rw-r--r--kernel/trace/bpf_trace.c373
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan.h4
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/devlink.c11
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/sock_map.c5
-rw-r--r--net/ipv4/tcp_bpf.c3
-rw-r--r--net/ipv4/udp_bpf.c5
-rw-r--r--net/mptcp/protocol.c25
-rw-r--r--net/openvswitch/meter.c4
-rw-r--r--net/qrtr/mhi.c8
-rw-r--r--net/sched/sch_taprio.c6
-rw-r--r--net/vmw_vsock/virtio_transport_common.c28
-rw-r--r--net/vmw_vsock/vmci_transport.c3
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--samples/bpf/tracex1_kern.c4
-rwxr-xr-xscripts/link-vmlinux.sh7
-rw-r--r--tools/bpf/bpftool/btf.c30
-rw-r--r--tools/bpf/bpftool/net.c2
-rw-r--r--tools/include/uapi/linux/bpf.h83
-rw-r--r--tools/lib/bpf/bpf_helpers.h21
-rw-r--r--tools/lib/bpf/bpf_tracing.h58
-rw-r--r--tools/lib/bpf/btf.c5
-rw-r--r--tools/lib/bpf/libbpf.c396
-rw-r--r--tools/lib/bpf/libbpf.h1
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/libbpf_internal.h45
-rw-r--r--tools/lib/bpf/linker.c1272
-rw-r--r--tools/scripts/Makefile.include12
-rw-r--r--tools/testing/selftests/bpf/Makefile28
-rw-r--r--tools/testing/selftests/bpf/README.rst9
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_funcs.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_maps.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_vars.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c27
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c73
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c73
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c82
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps2.c76
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars1.c54
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars2.c55
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c12
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c2
-rw-r--r--tools/testing/selftests/bpf/test_progs.h2
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_get_stack.c43
-rw-r--r--tools/testing/selftests/lib.mk4
128 files changed, 4505 insertions, 1563 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 2ed89abbf9a4..253496af8fef 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -29,7 +29,7 @@ list:
This may also include issues related to XDP, BPF tracing, etc.
Given netdev has a high volume of traffic, please also add the BPF
-maintainers to Cc (from kernel MAINTAINERS_ file):
+maintainers to Cc (from kernel ``MAINTAINERS`` file):
* Alexei Starovoitov <[email protected]>
* Daniel Borkmann <[email protected]>
@@ -234,11 +234,11 @@ be subject to change.
Q: samples/bpf preference vs selftests?
---------------------------------------
-Q: When should I add code to `samples/bpf/`_ and when to BPF kernel
-selftests_ ?
+Q: When should I add code to ``samples/bpf/`` and when to BPF kernel
+selftests_?
A: In general, we prefer additions to BPF kernel selftests_ rather than
-`samples/bpf/`_. The rationale is very simple: kernel selftests are
+``samples/bpf/``. The rationale is very simple: kernel selftests are
regularly run by various bots to test for kernel regressions.
The more test cases we add to BPF selftests, the better the coverage
@@ -246,9 +246,9 @@ and the less likely it is that those could accidentally break. It is
not that BPF kernel selftests cannot demo how a specific feature can
be used.
-That said, `samples/bpf/`_ may be a good place for people to get started,
+That said, ``samples/bpf/`` may be a good place for people to get started,
so it might be advisable that simple demos of features could go into
-`samples/bpf/`_, but advanced functional and corner-case testing rather
+``samples/bpf/``, but advanced functional and corner-case testing rather
into kernel selftests.
If your sample looks like a test case, then go for BPF kernel selftests
@@ -449,6 +449,19 @@ from source at
https://github.com/acmel/dwarves
+pahole starts to use libbpf definitions and APIs since v1.13 after the
+commit 21507cd3e97b ("pahole: add libbpf as submodule under lib/bpf").
+It works well with the git repository because the libbpf submodule will
+use "git submodule update --init --recursive" to update.
+
+Unfortunately, the default github release source code does not contain
+libbpf submodule source code and this will cause build issues, the tarball
+from https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ is same with
+github, you can get the source tarball with corresponding libbpf submodule
+codes from
+
+https://fedorapeople.org/~acme/dwarves
+
Some distros have pahole version 1.16 packaged already, e.g.
Fedora, Gentoo.
@@ -645,10 +658,9 @@ when:
.. Links
.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _MAINTAINERS: ../../MAINTAINERS
.. _netdev-FAQ: ../networking/netdev-FAQ.rst
-.. _samples/bpf/: ../../samples/bpf/
-.. _selftests: ../../tools/testing/selftests/bpf/
+.. _selftests:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
.. _Documentation/dev-tools/kselftest.rst:
https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
.. _Documentation/bpf/btf.rst: btf.rst
diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
new file mode 100644
index 000000000000..f2e91d1bf7d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/intel,ixp4xx-ethernet.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx ethernet
+
+allOf:
+ - $ref: "ethernet-controller.yaml#"
+
+maintainers:
+ - Linus Walleij <[email protected]>
+
+description: |
+ The Intel IXP4xx ethernet makes use of the IXP4xx NPE (Network
+ Processing Engine) and the IXP4xx Queue Manager to process
+ the ethernet frames. It can optionally contain an MDIO bus to
+ talk to PHYs.
+
+properties:
+ compatible:
+ const: intel,ixp4xx-ethernet
+
+ reg:
+ maxItems: 1
+ description: Ethernet MMIO address range
+
+ queue-rx:
+ $ref: '/schemas/types.yaml#/definitions/phandle-array'
+ maxItems: 1
+ description: phandle to the RX queue on the NPE
+
+ queue-txready:
+ $ref: '/schemas/types.yaml#/definitions/phandle-array'
+ maxItems: 1
+ description: phandle to the TX READY queue on the NPE
+
+ phy-mode: true
+
+ phy-handle: true
+
+ intel,npe-handle:
+ $ref: '/schemas/types.yaml#/definitions/phandle-array'
+ maxItems: 1
+ description: phandle to the NPE this ethernet instance is using
+ and the instance to use in the second cell
+
+ mdio:
+ type: object
+ $ref: "mdio.yaml#"
+ description: optional node for embedded MDIO controller
+
+required:
+ - compatible
+ - reg
+ - queue-rx
+ - queue-txready
+ - intel,npe-handle
+
+additionalProperties: false
+
+examples:
+ - |
+ npe: npe@c8006000 {
+ compatible = "intel,ixp4xx-network-processing-engine";
+ reg = <0xc8006000 0x1000>, <0xc8007000 0x1000>, <0xc8008000 0x1000>;
+ };
+
+ ethernet@c8009000 {
+ compatible = "intel,ixp4xx-ethernet";
+ reg = <0xc8009000 0x1000>;
+ status = "disabled";
+ queue-rx = <&qmgr 4>;
+ queue-txready = <&qmgr 21>;
+ intel,npe-handle = <&npe 1>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy1>;
+ };
+
+ ethernet@c800c000 {
+ compatible = "intel,ixp4xx-ethernet";
+ reg = <0xc800c000 0x1000>;
+ status = "disabled";
+ queue-rx = <&qmgr 3>;
+ queue-txready = <&qmgr 20>;
+ intel,npe-handle = <&npe 2>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy2>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ };
+ phy2: ethernet-phy@2 {
+ reg = <2>;
+ };
+ };
+ };
diff --git a/MAINTAINERS b/MAINTAINERS
index c3c8fa572580..0d85ae9e61e2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8532,7 +8532,6 @@ IBM Power SRIOV Virtual NIC Device Driver
M: Dany Madden <[email protected]>
M: Sukadev Bhattiprolu <[email protected]>
R: Thomas Falcon <[email protected]>
-R: Lijun Pan <[email protected]>
S: Supported
F: drivers/net/ethernet/ibm/ibmvnic.*
diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi
index b0b15c97306b..e81e5937a60a 100644
--- a/arch/arm/boot/dts/uniphier-pxs2.dtsi
+++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi
@@ -583,7 +583,7 @@
clocks = <&sys_clk 6>;
reset-names = "ether";
resets = <&sys_rst 6>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 0>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index a87b8a678719..8f2c1c1e2c64 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -734,7 +734,7 @@
clocks = <&sys_clk 6>;
reset-names = "ether";
resets = <&sys_rst 6>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 0>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 0e52dadf54b3..be97da132258 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -564,7 +564,7 @@
clocks = <&sys_clk 6>;
reset-names = "ether";
resets = <&sys_rst 6>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 0>;
@@ -585,7 +585,7 @@
clocks = <&sys_clk 7>;
reset-names = "ether";
resets = <&sys_rst 7>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 1>;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d5ca38aa8aa9..20bbda1b36e1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4391,9 +4391,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
int agg_id = 0;
int ret = 0;
-#ifdef CONFIG_LOCKDEP
- WARN_ON(lockdep_is_held(&bond->mode_lock));
-#endif
+ might_sleep();
usable_slaves = kzalloc(struct_size(usable_slaves, arr,
bond->slave_cnt), GFP_KERNEL);
@@ -4406,7 +4404,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info ad_info;
+ spin_lock_bh(&bond->mode_lock);
if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
+ spin_unlock_bh(&bond->mode_lock);
pr_debug("bond_3ad_get_active_agg_info failed\n");
/* No active aggragator means it's not safe to use
* the previous array.
@@ -4414,6 +4414,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
bond_reset_slave_arr(bond);
goto out;
}
+ spin_unlock_bh(&bond->mode_lock);
agg_id = ad_info.aggregator_id;
}
bond_for_each_slave(bond, slave, iter) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e15d454e33f0..39ac9e2f5118 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -122,7 +122,10 @@ enum board_idx {
NETXTREME_E_VF,
NETXTREME_C_VF,
NETXTREME_S_VF,
+ NETXTREME_C_VF_HV,
+ NETXTREME_E_VF_HV,
NETXTREME_E_P5_VF,
+ NETXTREME_E_P5_VF_HV,
};
/* indexed by enum above */
@@ -170,7 +173,10 @@ static const struct {
[NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
[NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
[NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" },
+ [NETXTREME_C_VF_HV] = { "Broadcom NetXtreme-C Virtual Function for Hyper-V" },
+ [NETXTREME_E_VF_HV] = { "Broadcom NetXtreme-E Virtual Function for Hyper-V" },
[NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" },
+ [NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" },
};
static const struct pci_device_id bnxt_pci_tbl[] = {
@@ -222,15 +228,25 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
#ifdef CONFIG_BNXT_SRIOV
{ PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF },
+ { PCI_VDEVICE(BROADCOM, 0x1607), .driver_data = NETXTREME_E_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x1608), .driver_data = NETXTREME_E_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF },
+ { PCI_VDEVICE(BROADCOM, 0x16bd), .driver_data = NETXTREME_E_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF },
+ { PCI_VDEVICE(BROADCOM, 0x16c2), .driver_data = NETXTREME_C_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x16c3), .driver_data = NETXTREME_C_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x16c4), .driver_data = NETXTREME_E_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x16c5), .driver_data = NETXTREME_E_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF },
{ PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF },
{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF },
{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF },
{ PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF },
+ { PCI_VDEVICE(BROADCOM, 0x16e6), .driver_data = NETXTREME_C_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x1806), .driver_data = NETXTREME_E_P5_VF },
{ PCI_VDEVICE(BROADCOM, 0x1807), .driver_data = NETXTREME_E_P5_VF },
+ { PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF },
#endif
{ 0 }
@@ -265,7 +281,8 @@ static struct workqueue_struct *bnxt_pf_wq;
static bool bnxt_vf_pciid(enum board_idx idx)
{
return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
- idx == NETXTREME_S_VF || idx == NETXTREME_E_P5_VF);
+ idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV ||
+ idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF);
}
#define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID)
@@ -358,6 +375,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct pci_dev *pdev = bp->pdev;
struct bnxt_tx_ring_info *txr;
struct bnxt_sw_tx_bd *tx_buf;
+ __le32 lflags = 0;
i = skb_get_queue_mapping(skb);
if (unlikely(i >= bp->tx_nr_rings)) {
@@ -399,6 +417,11 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
}
+ if (unlikely(skb->no_fcs)) {
+ lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
+ goto normal_tx;
+ }
+
if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
struct tx_push_buffer *tx_push_buf = txr->tx_push;
struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
@@ -500,7 +523,7 @@ normal_tx:
txbd1 = (struct tx_bd_ext *)
&txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
- txbd1->tx_bd_hsize_lflags = 0;
+ txbd1->tx_bd_hsize_lflags = lflags;
if (skb_is_gso(skb)) {
u32 hdr_len;
@@ -512,14 +535,14 @@ normal_tx:
hdr_len = skb_transport_offset(skb) +
tcp_hdrlen(skb);
- txbd1->tx_bd_hsize_lflags = cpu_to_le32(TX_BD_FLAGS_LSO |
+ txbd1->tx_bd_hsize_lflags |= cpu_to_le32(TX_BD_FLAGS_LSO |
TX_BD_FLAGS_T_IPID |
(hdr_len << (TX_BD_HSIZE_SHIFT - 1)));
length = skb_shinfo(skb)->gso_size;
txbd1->tx_bd_mss = cpu_to_le32(length);
length += hdr_len;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- txbd1->tx_bd_hsize_lflags =
+ txbd1->tx_bd_hsize_lflags |=
cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM);
txbd1->tx_bd_mss = 0;
}
@@ -1732,14 +1755,16 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
cons = rxcmp->rx_cmp_opaque;
if (unlikely(cons != rxr->rx_next_cons)) {
- int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
+ int rc1 = bnxt_discard_rx(bp, cpr, &tmp_raw_cons, rxcmp);
/* 0xffff is forced error, don't print it */
if (rxr->rx_next_cons != 0xffff)
netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
- return rc1;
+ if (rc1)
+ return rc1;
+ goto next_rx_no_prod_no_len;
}
rx_buf = &rxr->rx_buf_ring[cons];
data = rx_buf->data;
@@ -4145,7 +4170,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
bnxt_free_ntp_fltrs(bp, irq_re_init);
if (irq_re_init) {
bnxt_free_ring_stats(bp);
- if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET) ||
+ if (!(bp->phy_flags & BNXT_PHY_FL_PORT_STATS_NO_RESET) ||
test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
bnxt_free_port_stats(bp);
bnxt_free_ring_grps(bp);
@@ -8340,11 +8365,11 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
#endif
}
-/* Allow PF and VF with default VLAN to be in promiscuous mode */
+/* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */
static bool bnxt_promisc_ok(struct bnxt *bp)
{
#ifdef CONFIG_BNXT_SRIOV
- if (BNXT_VF(bp) && !bp->vf.vlan)
+ if (BNXT_VF(bp) && !bp->vf.vlan && !bnxt_is_trusted_vf(bp, &bp->vf))
return false;
#endif
return true;
@@ -8441,7 +8466,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
if (bp->dev->flags & IFF_BROADCAST)
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
- if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
+ if (bp->dev->flags & IFF_PROMISC)
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
if (bp->dev->flags & IFF_ALLMULTI) {
@@ -9075,8 +9100,9 @@ static char *bnxt_report_fec(struct bnxt_link_info *link_info)
static void bnxt_report_link(struct bnxt *bp)
{
if (bp->link_info.link_up) {
- const char *duplex;
+ const char *signal = "";
const char *flow_ctrl;
+ const char *duplex;
u32 speed;
u16 fec;
@@ -9098,9 +9124,24 @@ static void bnxt_report_link(struct bnxt *bp)
flow_ctrl = "ON - receive";
else
flow_ctrl = "none";
- netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n",
- speed, duplex, flow_ctrl);
- if (bp->flags & BNXT_FLAG_EEE_CAP)
+ if (bp->link_info.phy_qcfg_resp.option_flags &
+ PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN) {
+ u8 sig_mode = bp->link_info.active_fec_sig_mode &
+ PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK;
+ switch (sig_mode) {
+ case PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ:
+ signal = "(NRZ) ";
+ break;
+ case PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4:
+ signal = "(PAM4) ";
+ break;
+ default:
+ break;
+ }
+ }
+ netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s%s duplex, Flow control: %s\n",
+ speed, signal, duplex, flow_ctrl);
+ if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP)
netdev_info(bp->dev, "EEE is %s\n",
bp->eee.eee_active ? "active" :
"not active");
@@ -9132,10 +9173,6 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_link_info *link_info = &bp->link_info;
- bp->flags &= ~BNXT_FLAG_EEE_CAP;
- if (bp->test_info)
- bp->test_info->flags &= ~(BNXT_TEST_FL_EXT_LPBK |
- BNXT_TEST_FL_AN_PHY_LPBK);
if (bp->hwrm_spec_code < 0x10201)
return 0;
@@ -9146,31 +9183,17 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
if (rc)
goto hwrm_phy_qcaps_exit;
+ bp->phy_flags = resp->flags;
if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) {
struct ethtool_eee *eee = &bp->eee;
u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode);
- bp->flags |= BNXT_FLAG_EEE_CAP;
eee->supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
bp->lpi_tmr_lo = le32_to_cpu(resp->tx_lpi_timer_low) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_MASK;
bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
}
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED) {
- if (bp->test_info)
- bp->test_info->flags |= BNXT_TEST_FL_EXT_LPBK;
- }
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED) {
- if (bp->test_info)
- bp->test_info->flags |= BNXT_TEST_FL_AN_PHY_LPBK;
- }
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED) {
- if (BNXT_PF(bp))
- bp->fw_cap |= BNXT_FW_CAP_SHARED_PORT_CFG;
- }
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET)
- bp->fw_cap |= BNXT_FW_CAP_PORT_STATS_NO_RESET;
if (bp->hwrm_spec_code >= 0x10a01) {
if (bnxt_phy_qcaps_no_speed(resp)) {
@@ -9261,7 +9284,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
PORT_PHY_QCFG_RESP_PHY_ADDR_MASK;
link_info->module_status = resp->module_status;
- if (bp->flags & BNXT_FLAG_EEE_CAP) {
+ if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP) {
struct ethtool_eee *eee = &bp->eee;
u16 fw_speeds;
@@ -9497,7 +9520,8 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
if (!BNXT_SINGLE_PF(bp))
return 0;
- if (pci_num_vf(bp->pdev))
+ if (pci_num_vf(bp->pdev) &&
+ !(bp->phy_flags & BNXT_PHY_FL_FW_MANAGED_LKDN))
return 0;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
@@ -9782,7 +9806,9 @@ static ssize_t bnxt_show_temp(struct device *dev,
if (!rc)
len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */
mutex_unlock(&bp->hwrm_cmd_lock);
- return rc ?: len;
+ if (rc)
+ return rc;
+ return len;
}
static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0);
@@ -9839,7 +9865,7 @@ static bool bnxt_eee_config_ok(struct bnxt *bp)
struct ethtool_eee *eee = &bp->eee;
struct bnxt_link_info *link_info = &bp->link_info;
- if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+ if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
return true;
if (eee->eee_enabled) {
@@ -10486,7 +10512,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST |
CFA_L2_SET_RX_MASK_REQ_MASK_BCAST);
- if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
+ if (dev->flags & IFF_PROMISC)
mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
uc_update = bnxt_uc_list_updated(bp);
@@ -10562,6 +10588,9 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
}
skip_uc:
+ if ((vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS) &&
+ !bnxt_promisc_ok(bp))
+ vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
if (rc && vnic->mc_list_count) {
netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
@@ -10756,6 +10785,40 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
return rc;
}
+static netdev_features_t bnxt_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ struct bnxt *bp;
+ __be16 udp_port;
+ u8 l4_proto = 0;
+
+ features = vlan_features_check(skb, features);
+ if (!skb->encapsulation)
+ return features;
+
+ switch (vlan_get_protocol(skb)) {
+ case htons(ETH_P_IP):
+ l4_proto = ip_hdr(skb)->protocol;
+ break;
+ case htons(ETH_P_IPV6):
+ l4_proto = ipv6_hdr(skb)->nexthdr;
+ break;
+ default:
+ return features;
+ }
+
+ if (l4_proto != IPPROTO_UDP)
+ return features;
+
+ bp = netdev_priv(dev);
+ /* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
+ udp_port = udp_hdr(skb)->dest;
+ if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)
+ return features;
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
u32 *reg_buf)
{
@@ -12263,10 +12326,13 @@ static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
unsigned int cmd;
udp_tunnel_nic_get_port(netdev, table, 0, &ti);
- if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
+ if (ti.type == UDP_TUNNEL_TYPE_VXLAN) {
+ bp->vxlan_port = ti.port;
cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
- else
+ } else {
+ bp->nge_port = ti.port;
cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
+ }
if (ti.port)
return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd);
@@ -12366,6 +12432,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
.ndo_change_mtu = bnxt_change_mtu,
.ndo_fix_features = bnxt_fix_features,
.ndo_set_features = bnxt_set_features,
+ .ndo_features_check = bnxt_features_check,
.ndo_tx_timeout = bnxt_tx_timeout,
#ifdef CONFIG_BNXT_SRIOV
.ndo_get_vf_config = bnxt_get_vf_config,
@@ -12434,12 +12501,17 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
int rc = 0;
struct bnxt_link_info *link_info = &bp->link_info;
+ bp->phy_flags = 0;
rc = bnxt_hwrm_phy_qcaps(bp);
if (rc) {
netdev_err(bp->dev, "Probe phy can't get phy capabilities (rc: %x)\n",
rc);
return rc;
}
+ if (bp->phy_flags & BNXT_PHY_FL_NO_FCS)
+ bp->dev->priv_flags |= IFF_SUPP_NOFCS;
+ else
+ bp->dev->priv_flags &= ~IFF_SUPP_NOFCS;
if (!fw_dflt)
return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 29061c577baa..24d2ad6a8740 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1341,9 +1341,6 @@ struct bnxt_led_info {
struct bnxt_test_info {
u8 offline_mask;
- u8 flags;
-#define BNXT_TEST_FL_EXT_LPBK 0x1
-#define BNXT_TEST_FL_AN_PHY_LPBK 0x2
u16 timeout;
char string[BNXT_MAX_TEST][ETH_GSTRING_LEN];
};
@@ -1693,7 +1690,6 @@ struct bnxt {
#define BNXT_FLAG_SHARED_RINGS 0x200
#define BNXT_FLAG_PORT_STATS 0x400
#define BNXT_FLAG_UDP_RSS_CAP 0x800
- #define BNXT_FLAG_EEE_CAP 0x1000
#define BNXT_FLAG_NEW_RSS_CAP 0x2000
#define BNXT_FLAG_WOL_CAP 0x4000
#define BNXT_FLAG_ROCEV1_CAP 0x8000
@@ -1720,8 +1716,10 @@ struct bnxt {
#define BNXT_NPAR(bp) ((bp)->port_partition_type)
#define BNXT_MH(bp) ((bp)->flags & BNXT_FLAG_MULTI_HOST)
#define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp))
+#define BNXT_SH_PORT_CFG_OK(bp) (BNXT_PF(bp) && \
+ ((bp)->phy_flags & BNXT_PHY_FL_SHARED_PORT_CFG))
#define BNXT_PHY_CFG_ABLE(bp) ((BNXT_SINGLE_PF(bp) || \
- ((bp)->fw_cap & BNXT_FW_CAP_SHARED_PORT_CFG)) && \
+ BNXT_SH_PORT_CFG_OK(bp)) && \
(bp)->link_info.phy_state == BNXT_PHY_STATE_ENABLED)
#define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
#define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
@@ -1871,11 +1869,9 @@ struct bnxt {
#define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000
#define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000
#define BNXT_FW_CAP_HOT_RESET 0x00200000
- #define BNXT_FW_CAP_SHARED_PORT_CFG 0x00400000
#define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000
#define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000
#define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000
- #define BNXT_FW_CAP_PORT_STATS_NO_RESET 0x10000000
#define BNXT_FW_CAP_RING_MONITOR 0x40000000
#define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
@@ -1918,6 +1914,8 @@ struct bnxt {
u16 vxlan_fw_dst_port_id;
u16 nge_fw_dst_port_id;
+ __be16 vxlan_port;
+ __be16 nge_port;
u8 port_partition_type;
u8 port_count;
u16 br_mode;
@@ -2010,6 +2008,17 @@ struct bnxt {
u32 lpi_tmr_lo;
u32 lpi_tmr_hi;
+ /* copied from flags in hwrm_port_phy_qcaps_output */
+ u8 phy_flags;
+#define BNXT_PHY_FL_EEE_CAP PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED
+#define BNXT_PHY_FL_EXT_LPBK PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED
+#define BNXT_PHY_FL_AN_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED
+#define BNXT_PHY_FL_SHARED_PORT_CFG PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED
+#define BNXT_PHY_FL_PORT_STATS_NO_RESET PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET
+#define BNXT_PHY_FL_NO_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED
+#define BNXT_PHY_FL_FW_MANAGED_LKDN PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN
+#define BNXT_PHY_FL_NO_FCS PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS
+
u8 num_tests;
struct bnxt_test_info *test_info;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 3b66e300c962..c664ec52ebcf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2912,7 +2912,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
if (!BNXT_PHY_CFG_ABLE(bp))
return -EOPNOTSUPP;
- if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+ if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
return -EOPNOTSUPP;
mutex_lock(&bp->link_lock);
@@ -2963,7 +2963,7 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_eee *edata)
{
struct bnxt *bp = netdev_priv(dev);
- if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+ if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
return -EOPNOTSUPP;
*edata = bp->eee;
@@ -3215,7 +3215,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
int rc;
if (!link_info->autoneg ||
- (bp->test_info->flags & BNXT_TEST_FL_AN_PHY_LPBK))
+ (bp->phy_flags & BNXT_PHY_FL_AN_PHY_LPBK))
return 0;
rc = bnxt_query_force_speeds(bp, &fw_advertising);
@@ -3416,7 +3416,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
}
if ((etest->flags & ETH_TEST_FL_EXTERNAL_LB) &&
- (bp->test_info->flags & BNXT_TEST_FL_EXT_LPBK))
+ (bp->phy_flags & BNXT_PHY_FL_EXT_LPBK))
do_ext_lpbk = true;
if (etest->flags & ETH_TEST_FL_OFFLINE) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index a217316228f4..eb00a219aa51 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -49,10 +49,6 @@ static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
{
- if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
- netdev_err(bp->dev, "vf ndo called though PF is down\n");
- return -EINVAL;
- }
if (!bp->pf.active_vfs) {
netdev_err(bp->dev, "vf ndo called though sriov is disabled\n");
return -EINVAL;
@@ -113,7 +109,7 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
int rc;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
+ req.fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff);
mutex_lock(&bp->hwrm_cmd_lock);
rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc) {
@@ -125,9 +121,9 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
return 0;
}
-static bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
+bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
{
- if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
+ if (BNXT_PF(bp) && !(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
return !!(vf->flags & BNXT_VF_TRUST);
bnxt_hwrm_func_qcfg_flags(bp, vf);
@@ -1120,10 +1116,38 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp)
}
}
+int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
+{
+ struct hwrm_func_vf_cfg_input req = {0};
+ int rc = 0;
+
+ if (!BNXT_VF(bp))
+ return 0;
+
+ if (bp->hwrm_spec_code < 0x10202) {
+ if (is_valid_ether_addr(bp->vf.mac_addr))
+ rc = -EADDRNOTAVAIL;
+ goto mac_done;
+ }
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
+ req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+ memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
+ rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+mac_done:
+ if (rc && strict) {
+ rc = -EADDRNOTAVAIL;
+ netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n",
+ mac);
+ return rc;
+ }
+ return 0;
+}
+
void bnxt_update_vf_mac(struct bnxt *bp)
{
struct hwrm_func_qcaps_input req = {0};
struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+ bool inform_pf = false;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1);
req.fid = cpu_to_le16(0xffff);
@@ -1139,42 +1163,24 @@ void bnxt_update_vf_mac(struct bnxt *bp)
* default but the stored zero MAC will allow the VF user to change
* the random MAC address using ndo_set_mac_address() if he wants.
*/
- if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr))
+ if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr)) {
memcpy(bp->vf.mac_addr, resp->mac_address, ETH_ALEN);
+ /* This means we are now using our own MAC address, let
+ * the PF know about this MAC address.
+ */
+ if (!is_valid_ether_addr(bp->vf.mac_addr))
+ inform_pf = true;
+ }
/* overwrite netdev dev_addr with admin VF MAC */
if (is_valid_ether_addr(bp->vf.mac_addr))
memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
update_vf_mac_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
+ if (inform_pf)
+ bnxt_approve_mac(bp, bp->dev->dev_addr, false);
}
-int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
-{
- struct hwrm_func_vf_cfg_input req = {0};
- int rc = 0;
-
- if (!BNXT_VF(bp))
- return 0;
-
- if (bp->hwrm_spec_code < 0x10202) {
- if (is_valid_ether_addr(bp->vf.mac_addr))
- rc = -EADDRNOTAVAIL;
- goto mac_done;
- }
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
- req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
- memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-mac_done:
- if (rc && strict) {
- rc = -EADDRNOTAVAIL;
- netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n",
- mac);
- return rc;
- }
- return 0;
-}
#else
int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 629641bf6fc5..995535e4c11b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -34,6 +34,7 @@ int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
int bnxt_set_vf_bw(struct net_device *, int, int, int);
int bnxt_set_vf_link_state(struct net_device *, int, int);
int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
+bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf);
int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trust);
int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs);
int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index a3f5b80888e5..ef3f1e92632f 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -33,7 +33,6 @@ static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
if (unlikely(start < skb_linear_data_len)) {
frag_size = min(len, skb_linear_data_len - start);
- start = 0;
} else {
start -= skb_linear_data_len;
@@ -873,10 +872,10 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
}
/* update receive window */
if (first_wr || tx_info->prev_win != tcp_win) {
- pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
- TCB_RCV_WND_W,
- TCB_RCV_WND_V(TCB_RCV_WND_M),
- TCB_RCV_WND_V(tcp_win), 0);
+ chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+ TCB_RCV_WND_W,
+ TCB_RCV_WND_V(TCB_RCV_WND_M),
+ TCB_RCV_WND_V(tcp_win), 0);
tx_info->prev_win = tcp_win;
cpl++;
}
@@ -1485,7 +1484,6 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
wr->cookie = 0;
- pos += sizeof(*wr);
/* ULP_TXPKT */
ulptx = (struct ulp_txpkt *)(wr + 1);
ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index f08c420a5803..2768c78528a5 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -431,7 +431,8 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
netif_carrier_on(port->dev);
if (!delayed_work_pending(caching_dw))
queue_delayed_work(prestera_wq, caching_dw, 0);
- } else {
+ } else if (netif_running(port->dev) &&
+ netif_carrier_ok(port->dev)) {
netif_carrier_off(port->dev);
if (delayed_work_pending(caching_dw))
cancel_delayed_work(caching_dw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
index 3e19b1721303..0399a396d166 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -96,7 +96,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
}
if (!vport->egress.acl) {
- vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->egress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_EGRESS,
table_size);
if (IS_ERR(vport->egress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 26b37a0f8762..505bf811984a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -148,7 +148,7 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
esw_acl_egress_vlan_grp_destroy(vport);
}
-static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num)
+static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num)
{
return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num);
}
@@ -171,7 +171,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
table_size++;
if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
table_size++;
- vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->egress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
if (IS_ERR(vport->egress.acl)) {
err = PTR_ERR(vport->egress.acl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
index 4a369669e51e..45b839116212 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -6,14 +6,14 @@
#include "helper.h"
struct mlx5_flow_table *
-esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *acl;
int acl_supported;
- int vport_index;
+ u16 vport_num;
int err;
acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ?
@@ -23,11 +23,11 @@ esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
if (!acl_supported)
return ERR_PTR(-EOPNOTSUPP);
+ vport_num = vport->vport;
esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
- vport_index = mlx5_eswitch_vport_num_to_index(esw, vport_num);
- root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport_index);
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index);
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
vport_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
index 8dc4cab66a71..a47063fab57e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -8,7 +8,7 @@
/* General acl helper functions */
struct mlx5_flow_table *
-esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size);
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size);
/* Egress acl helper functions */
void esw_acl_egress_table_destroy(struct mlx5_vport *vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
index d64fad2823e7..f75b86abaf1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -177,7 +177,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
}
if (!vport->ingress.acl) {
- vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_INGRESS,
table_size);
if (IS_ERR(vport->ingress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
index 548c005ea633..39e948bc1204 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -7,7 +7,7 @@
#include "ofld.h"
static bool
-esw_acl_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
+esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw,
const struct mlx5_vport *vport)
{
return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
@@ -255,7 +255,7 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
if (esw_acl_ingress_prio_tag_enabled(esw, vport))
num_ftes++;
- vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_INGRESS,
num_ftes);
if (IS_ERR(vport->ingress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 7bfc84238b3d..1703384eca95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -14,8 +14,7 @@ mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_i
memcpy(ppid->id, &parent_id, sizeof(parent_id));
}
-static bool
-mlx5_esw_devlink_port_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num)
{
return vport_num == MLX5_VPORT_UPLINK ||
(mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
@@ -124,7 +123,7 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1
}
int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum)
+ u16 vport_num, u32 controller, u32 sfnum)
{
struct mlx5_core_dev *dev = esw->dev;
struct netdev_phys_item_id ppid = {};
@@ -142,7 +141,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
mlx5_esw_get_port_parent_id(dev, &ppid);
memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len);
dl_port->attrs.switch_id.id_len = ppid.id_len;
- devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum);
+ devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller);
devlink = priv_to_devlink(dev);
dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
err = devlink_port_register(devlink, dl_port, dl_port_index);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 8ab1224653a4..d9041b16611d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -216,7 +216,8 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
int esw_legacy_enable(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int ret, i;
+ unsigned long i;
+ int ret;
ret = esw_create_legacy_table(esw);
if (ret)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1bb229ecd43b..570f2280823c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -88,20 +88,17 @@ struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink)
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
- u16 idx;
+ struct mlx5_vport *vport;
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
return ERR_PTR(-EPERM);
- idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-
- if (idx > esw->total_vports - 1) {
- esw_debug(esw->dev, "vport out of range: num(0x%x), idx(0x%x)\n",
- vport_num, idx);
+ vport = xa_load(&esw->vports, vport_num);
+ if (!vport) {
+ esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num);
return ERR_PTR(-EINVAL);
}
-
- return &esw->vports[idx];
+ return vport;
}
static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
@@ -345,9 +342,10 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
{
u8 *mac = vaddr->node.addr;
struct mlx5_vport *vport;
- u16 i, vport_num;
+ unsigned long i;
+ u16 vport_num;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
struct hlist_head *vport_hash = vport->mc_list;
struct vport_addr *iter_vaddr =
l2addr_hash_find(vport_hash,
@@ -1175,7 +1173,7 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
memset(&vport->qos, 0, sizeof(vport->qos));
@@ -1213,20 +1211,25 @@ void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
{
- int i;
+ struct mlx5_vport *vport;
+ unsigned long i;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i, num_vfs)
- mlx5_eswitch_unload_vport(esw, i);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ if (!vport->enabled)
+ continue;
+ mlx5_eswitch_unload_vport(esw, vport->vport);
+ }
}
int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
enum mlx5_eswitch_vport_event enabled_events)
{
+ struct mlx5_vport *vport;
+ unsigned long i;
int err;
- int i;
- mlx5_esw_for_each_vf_vport_num(esw, i, num_vfs) {
- err = mlx5_eswitch_load_vport(esw, i, enabled_events);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
if (err)
goto vf_err;
}
@@ -1234,7 +1237,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
return 0;
vf_err:
- mlx5_eswitch_unload_vf_vports(esw, i - 1);
+ mlx5_eswitch_unload_vf_vports(esw, num_vfs);
return err;
}
@@ -1563,24 +1566,161 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
up_write(&esw->mode_lock);
}
+static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
+{
+ u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+}
+
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id)
+
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ if (!mlx5_core_is_ecpf(dev)) {
+ *max_sfs = 0;
+ return 0;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_cap_host_pf(dev, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf);
+ *sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
+static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev,
+ int index, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+ int err;
+
+ vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+ if (!vport)
+ return -ENOMEM;
+
+ vport->dev = esw->dev;
+ vport->vport = vport_num;
+ vport->index = index;
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler);
+ err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL);
+ if (err)
+ goto insert_err;
+
+ esw->total_vports++;
+ return 0;
+
+insert_err:
+ kfree(vport);
+ return err;
+}
+
+static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ xa_erase(&esw->vports, vport->vport);
+ kfree(vport);
+}
+
+static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vport(esw, i, vport)
+ mlx5_esw_vport_free(esw, vport);
+ xa_destroy(&esw->vports);
+}
+
+static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ u16 max_host_pf_sfs;
+ u16 base_sf_num;
+ int idx = 0;
+ int err;
+ int i;
+
+ xa_init(&esw->vports);
+
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF);
+ if (err)
+ goto err;
+ if (esw->first_host_vport == MLX5_VPORT_PF)
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+
+ for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, idx);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+ }
+ base_sf_num = mlx5_sf_start_function_id(dev);
+ for (i = 0; i < mlx5_sf_max_functions(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num);
+ if (err)
+ goto err;
+ for (i = 0; i < max_host_pf_sfs; i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ if (mlx5_ecpf_vport_exists(dev)) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF);
+ if (err)
+ goto err;
+ idx++;
+ }
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK);
+ if (err)
+ goto err;
+ return 0;
+
+err:
+ mlx5_esw_vports_cleanup(esw);
+ return err;
+}
+
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- int total_vports;
- int err, i;
+ int err;
if (!MLX5_VPORT_MANAGER(dev))
return 0;
- total_vports = mlx5_eswitch_get_total_vports(dev);
-
- esw_info(dev,
- "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
- total_vports,
- MLX5_MAX_UC_PER_VPORT(dev),
- MLX5_MAX_MC_PER_VPORT(dev));
-
esw = kzalloc(sizeof(*esw), GFP_KERNEL);
if (!esw)
return -ENOMEM;
@@ -1595,18 +1735,13 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
goto abort;
}
- esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
- GFP_KERNEL);
- if (!esw->vports) {
- err = -ENOMEM;
+ err = mlx5_esw_vports_init(esw);
+ if (err)
goto abort;
- }
-
- esw->total_vports = total_vports;
err = esw_offloads_init_reps(esw);
if (err)
- goto abort;
+ goto reps_err;
mutex_init(&esw->offloads.encap_tbl_lock);
hash_init(esw->offloads.encap_tbl);
@@ -1619,25 +1754,25 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
mutex_init(&esw->state_lock);
init_rwsem(&esw->mode_lock);
- mlx5_esw_for_all_vports(esw, i, vport) {
- vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
- vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
- vport->dev = dev;
- INIT_WORK(&vport->vport_change_handler,
- esw_vport_change_handler);
- }
-
esw->enabled_vports = 0;
esw->mode = MLX5_ESWITCH_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+
+ esw_info(dev,
+ "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+ esw->total_vports,
+ MLX5_MAX_UC_PER_VPORT(dev),
+ MLX5_MAX_MC_PER_VPORT(dev));
return 0;
+
+reps_err:
+ mlx5_esw_vports_cleanup(esw);
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
- kfree(esw->vports);
kfree(esw);
return err;
}
@@ -1659,7 +1794,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
mutex_destroy(&esw->offloads.encap_tbl_lock);
mutex_destroy(&esw->offloads.decap_tbl_lock);
esw_offloads_cleanup_reps(esw);
- kfree(esw->vports);
+ mlx5_esw_vports_cleanup(esw);
kfree(esw);
}
@@ -1718,8 +1853,29 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
return err;
}
+static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return false;
+
+ return xa_get_mark(&esw->vports, vport_num, mark);
+}
+
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF);
+}
+
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF);
+}
+
static bool
-is_port_function_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
{
return vport_num == MLX5_VPORT_PF ||
mlx5_eswitch_is_vf_vport(esw, vport_num) ||
@@ -1891,9 +2047,9 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
struct mlx5_vport *evport;
u32 max_guarantee = 0;
- int i;
+ unsigned long i;
- mlx5_esw_for_all_vports(esw, i, evport) {
+ mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled || evport->qos.min_rate < max_guarantee)
continue;
max_guarantee = evport->qos.min_rate;
@@ -1911,11 +2067,11 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
struct mlx5_vport *evport;
u32 vport_max_rate;
u32 vport_min_rate;
+ unsigned long i;
u32 bw_share;
int err;
- int i;
- mlx5_esw_for_all_vports(esw, i, evport) {
+ mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled)
continue;
vport_min_rate = evport->qos.min_rate;
@@ -2205,3 +2361,19 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw)
{
up_write(&esw->mode_lock);
}
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev: Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of eswitch vports.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b289d756a7e4..64ccb2bc0b58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -176,6 +176,7 @@ struct mlx5_vport {
u16 vport;
bool enabled;
enum mlx5_eswitch_vport_event enabled_events;
+ int index;
struct devlink_port *dl_port;
};
@@ -228,7 +229,7 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
- struct mlx5_eswitch_rep *vport_reps;
+ struct xarray vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
@@ -278,7 +279,7 @@ struct mlx5_eswitch {
struct esw_mc_addr mc_promisc;
/* end of legacy */
struct workqueue_struct *work_queue;
- struct mlx5_vport *vports;
+ struct xarray vports;
u32 flags;
int total_vports;
int enabled_vports;
@@ -545,94 +546,11 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
}
-static inline int mlx5_esw_sf_start_idx(const struct mlx5_eswitch *esw)
-{
- /* PF and VF vports indices start from 0 to max_vfs */
- return MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev);
-}
-
-static inline int mlx5_esw_sf_end_idx(const struct mlx5_eswitch *esw)
-{
- return mlx5_esw_sf_start_idx(esw) + mlx5_sf_max_functions(esw->dev);
-}
-
-static inline int
-mlx5_esw_sf_vport_num_to_index(const struct mlx5_eswitch *esw, u16 vport_num)
-{
- return vport_num - mlx5_sf_start_function_id(esw->dev) +
- MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev);
-}
-
-static inline u16
-mlx5_esw_sf_vport_index_to_num(const struct mlx5_eswitch *esw, int idx)
-{
- return mlx5_sf_start_function_id(esw->dev) + idx -
- (MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev));
-}
-
-static inline bool
-mlx5_esw_is_sf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
-{
- return mlx5_sf_supported(esw->dev) &&
- vport_num >= mlx5_sf_start_function_id(esw->dev) &&
- (vport_num < (mlx5_sf_start_function_id(esw->dev) +
- mlx5_sf_max_functions(esw->dev)));
-}
-
static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev)
{
return mlx5_core_is_ecpf_esw_manager(dev);
}
-static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
-{
- /* Uplink always locate at the last element of the array.*/
- return esw->total_vports - 1;
-}
-
-static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
-{
- return esw->total_vports - 2;
-}
-
-static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
- u16 vport_num)
-{
- if (vport_num == MLX5_VPORT_ECPF) {
- if (!mlx5_ecpf_vport_exists(esw->dev))
- esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
- return mlx5_eswitch_ecpf_idx(esw);
- }
-
- if (vport_num == MLX5_VPORT_UPLINK)
- return mlx5_eswitch_uplink_idx(esw);
-
- if (mlx5_esw_is_sf_vport(esw, vport_num))
- return mlx5_esw_sf_vport_num_to_index(esw, vport_num);
-
- /* PF and VF vports start from 0 to max_vfs */
- return vport_num;
-}
-
-static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
- int index)
-{
- if (index == mlx5_eswitch_ecpf_idx(esw) &&
- mlx5_ecpf_vport_exists(esw->dev))
- return MLX5_VPORT_ECPF;
-
- if (index == mlx5_eswitch_uplink_idx(esw))
- return MLX5_VPORT_UPLINK;
-
- /* SF vports indices are after VFs and before ECPF */
- if (mlx5_sf_supported(esw->dev) &&
- index > mlx5_core_max_vfs(esw->dev))
- return mlx5_esw_sf_vport_index_to_num(esw, index);
-
- /* PF and VF vports start from 0 to max_vfs */
- return index;
-}
-
static inline unsigned int
mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
u16 vport_num)
@@ -649,82 +567,42 @@ mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index)
/* TODO: This mlx5e_tc function shouldn't be called by eswitch */
void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
-/* The vport getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+/* Each mark identifies eswitch vport type.
+ * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using
+ * a single mark.
+ * MLX5_ESW_VPT_VF identifies a SRIOV VF vport.
+ * MLX5_ESW_VPT_SF identifies SF vport.
*/
-#define mlx5_esw_for_all_vports(esw, i, vport) \
- for ((i) = MLX5_VPORT_PF; \
- (vport) = &(esw)->vports[i], \
- (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \
- for ((i) = (esw)->total_vports - 1; \
- (vport) = &(esw)->vports[i], \
- (i) >= MLX5_VPORT_PF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \
- for ((i) = MLX5_VPORT_FIRST_VF; \
- (vport) = &(esw)->vports[(i)], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs) \
- for ((i) = (nvfs); \
- (vport) = &(esw)->vports[(i)], \
- (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-/* The rep getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+#define MLX5_ESW_VPT_HOST_FN XA_MARK_0
+#define MLX5_ESW_VPT_VF XA_MARK_1
+#define MLX5_ESW_VPT_SF XA_MARK_2
+
+/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init.
+ * Borrowed the idea from xa_for_each_marked() but with support for desired last element.
*/
-#define mlx5_esw_for_all_reps(esw, i, rep) \
- for ((i) = MLX5_VPORT_PF; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \
- for ((i) = MLX5_VPORT_FIRST_VF; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \
- for ((i) = (nvfs); \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs) \
- for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \
- for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
-
-/* Includes host PF (vport 0) if it's not esw manager. */
-#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \
- for ((i) = (esw)->first_host_vport; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \
- for ((i) = (nvfs); \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) >= (esw)->first_host_vport; (i)--)
-
-#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \
- for ((vport) = (esw)->first_host_vport; \
- (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \
- for ((vport) = (nvfs); \
- (vport) >= (esw)->first_host_vport; (vport)--)
-
-#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
- for ((i) = mlx5_esw_sf_start_idx(esw); \
- (rep) = &(esw)->offloads.vport_reps[(i)], \
- (i) < mlx5_esw_sf_end_idx(esw); (i++))
+
+#define mlx5_esw_for_each_vport(esw, index, vport) \
+ xa_for_each(&((esw)->vports), index, vport)
+
+#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter) \
+ for (index = 0, entry = xa_find(xa, &index, last, filter); \
+ entry; entry = xa_find_after(xa, &index, last, filter))
+
+#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter) \
+ mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter)
+
+#define mlx5_esw_for_each_vf_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF)
+
+#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN)
struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink);
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num);
int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
@@ -784,12 +662,13 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum);
+ u16 vport_num, u32 controller, u32 sfnum);
void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum);
+ u16 vport_num, u32 controller, u32 sfnum);
void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id);
int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num);
void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num);
@@ -816,6 +695,8 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bbb707117296..db1e74280e57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -49,6 +49,16 @@
#include "en_tc.h"
#include "en/mapping.h"
+#define mlx5_esw_for_each_rep(esw, i, rep) \
+ xa_for_each(&((esw)->offloads.vport_reps), i, rep)
+
+#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
+ xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF)
+
+#define mlx5_esw_for_each_vf_rep(esw, index, rep) \
+ mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \
+ rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF)
+
/* There are two match-all miss flows, one for unicast dst mac and
* one for multicast.
*/
@@ -67,10 +77,7 @@ static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
u16 vport_num)
{
- int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-
- WARN_ON(idx > esw->total_vports - 1);
- return &esw->offloads.vport_reps[idx];
+ return xa_load(&esw->offloads.vport_reps, vport_num);
}
static void
@@ -720,10 +727,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
{
struct mlx5_eswitch_rep *rep;
- int i, err = 0;
+ unsigned long i;
+ int err = 0;
esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
- mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
+ mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) {
if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
continue;
@@ -972,13 +980,13 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
{
struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules;
- int i = 0, num_vfs = esw->esw_funcs.num_vfs, vport_num;
+ int i = 0, num_vfs = esw->esw_funcs.num_vfs;
if (!num_vfs || !flows)
return;
- mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs)
- mlx5_del_flow_rules(flows[i++]);
+ for (i = 0; i < num_vfs; i++)
+ mlx5_del_flow_rules(flows[i]);
kvfree(flows);
}
@@ -992,6 +1000,8 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
struct mlx5_flow_handle *flow_rule;
struct mlx5_flow_handle **flows;
struct mlx5_flow_spec *spec;
+ struct mlx5_vport *vport;
+ unsigned long i;
u16 vport_num;
num_vfs = esw->esw_funcs.num_vfs;
@@ -1016,7 +1026,8 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) {
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ vport_num = vport->vport;
MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
dest.vport.num = vport_num;
@@ -1158,12 +1169,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_handle **flows;
- struct mlx5_flow_handle *flow;
- struct mlx5_flow_spec *spec;
/* total vports is the same for both e-switches */
int nvports = esw->total_vports;
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_vport *vport;
+ unsigned long i;
void *misc;
- int err, i;
+ int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
@@ -1182,6 +1195,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc_parameters);
if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
spec, MLX5_VPORT_PF);
@@ -1191,10 +1205,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_pf_flow_err;
}
- flows[MLX5_VPORT_PF] = flow;
+ flows[vport->index] = flow;
}
if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
@@ -1202,13 +1217,13 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_ecpf_flow_err;
}
- flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+ flows[vport->index] = flow;
}
- mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
esw_set_peer_miss_rule_source_port(esw,
peer_dev->priv.eswitch,
- spec, i);
+ spec, vport->vport);
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
@@ -1216,7 +1231,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_vf_flow_err;
}
- flows[i] = flow;
+ flows[vport->index] = flow;
}
esw->fdb_table.offloads.peer_miss_rules = flows;
@@ -1225,15 +1240,20 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
return 0;
add_vf_flow_err:
- nvports = --i;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports)
- mlx5_del_flow_rules(flows[i]);
-
- if (mlx5_ecpf_vport_exists(esw->dev))
- mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
+ if (!flows[vport->index])
+ continue;
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
add_ecpf_flow_err:
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
add_pf_flow_err:
esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
kvfree(flows);
@@ -1245,20 +1265,23 @@ alloc_flows_err:
static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
{
struct mlx5_flow_handle **flows;
- int i;
+ struct mlx5_vport *vport;
+ unsigned long i;
flows = esw->fdb_table.offloads.peer_miss_rules;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i,
- mlx5_core_max_vfs(esw->dev))
- mlx5_del_flow_rules(flows[i]);
-
- if (mlx5_ecpf_vport_exists(esw->dev))
- mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
+ mlx5_del_flow_rules(flows[vport->index]);
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
kvfree(flows);
}
@@ -1402,11 +1425,11 @@ static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
{
struct mlx5_vport_tbl_attr attr;
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
attr.chain = 0;
attr.prio = 1;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
attr.vport = vport->vport;
attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
mlx5_esw_vporttbl_put(esw, &attr);
@@ -1418,11 +1441,11 @@ static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
struct mlx5_vport_tbl_attr attr;
struct mlx5_flow_table *fdb;
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
attr.chain = 0;
attr.prio = 1;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
attr.vport = vport->vport;
attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
fdb = mlx5_esw_vporttbl_get(esw, &attr);
@@ -1910,12 +1933,12 @@ out:
return flow_rule;
}
-
-static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode)
+static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
struct mlx5_core_dev *dev = esw->dev;
- int vport;
+ struct mlx5_vport *vport;
+ unsigned long i;
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
@@ -1936,8 +1959,8 @@ static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode
query_vports:
mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
- mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
- mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode);
if (prev_mlx5_mode != mlx5_mode)
return -EINVAL;
prev_mlx5_mode = mlx5_mode;
@@ -2080,34 +2103,82 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
return err;
}
-void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ xa_mark_t mark)
{
- kfree(esw->offloads.vport_reps);
+ bool mark_set;
+
+ /* Copy the mark from vport to its rep */
+ mark_set = xa_get_mark(&esw->vports, rep->vport, mark);
+ if (mark_set)
+ xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark);
}
-int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport)
{
- int total_vports = esw->total_vports;
struct mlx5_eswitch_rep *rep;
- int vport_index;
- u8 rep_type;
+ int rep_type;
+ int err;
- esw->offloads.vport_reps = kcalloc(total_vports,
- sizeof(struct mlx5_eswitch_rep),
- GFP_KERNEL);
- if (!esw->offloads.vport_reps)
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
return -ENOMEM;
- mlx5_esw_for_all_reps(esw, vport_index, rep) {
- rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
- rep->vport_index = vport_index;
+ rep->vport = vport->vport;
+ rep->vport_index = vport->index;
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+ atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
- for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
- atomic_set(&rep->rep_data[rep_type].state,
- REP_UNREGISTERED);
- }
+ err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
+ if (err)
+ goto insert_err;
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF);
return 0;
+
+insert_err:
+ kfree(rep);
+ return err;
+}
+
+static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep)
+{
+ xa_erase(&esw->offloads.vport_reps, rep->vport);
+ kfree(rep);
+}
+
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ mlx5_esw_for_each_rep(esw, i, rep)
+ mlx5_esw_offloads_rep_cleanup(esw, rep);
+ xa_destroy(&esw->offloads.vport_reps);
+}
+
+int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ xa_init(&esw->offloads.vport_reps);
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ err = mlx5_esw_offloads_rep_init(esw, vport);
+ if (err)
+ goto err;
+ }
+ return 0;
+
+err:
+ esw_offloads_cleanup_reps(esw);
+ return err;
}
static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
@@ -2121,7 +2192,7 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
mlx5_esw_for_each_sf_rep(esw, i, rep)
__esw_offloads_unload_rep(esw, rep, rep_type);
@@ -2130,11 +2201,11 @@ static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
__unload_reps_sf_vport(esw, rep_type);
- mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs)
+ mlx5_esw_for_each_vf_rep(esw, i, rep)
__esw_offloads_unload_rep(esw, rep, rep_type);
if (mlx5_ecpf_vport_exists(esw->dev)) {
@@ -2421,25 +2492,25 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
return;
- mlx5_esw_for_all_vports_reverse(esw, i, vport)
+ mlx5_esw_for_each_vport(esw, i, vport)
esw_offloads_vport_metadata_cleanup(esw, vport);
}
static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
+ unsigned long i;
int err;
- int i;
if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
return 0;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
err = esw_offloads_vport_metadata_setup(esw, vport);
if (err)
goto metadata_err;
@@ -2676,11 +2747,25 @@ static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
return 0;
}
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller)
+{
+ /* Local controller is always valid */
+ if (controller == 0)
+ return true;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return false;
+
+ /* External host number starts with zero in device */
+ return (controller == esw->offloads.host_number + 1);
+}
+
int esw_offloads_enable(struct mlx5_eswitch *esw)
{
struct mapping_ctx *reg_c0_obj_pool;
struct mlx5_vport *vport;
- int err, i;
+ unsigned long i;
+ int err;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
@@ -2926,13 +3011,44 @@ unlock:
return err;
}
+static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_vport *vport;
+ u16 err_vport_num = 0;
+ unsigned long i;
+ int err = 0;
+
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
+ if (err) {
+ err_vport_num = vport->vport;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to set min inline on vport");
+ goto revert_inline_mode;
+ }
+ }
+ return 0;
+
+revert_inline_mode:
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ if (vport->vport == err_vport_num)
+ break;
+ mlx5_modify_nic_vport_min_inline(dev,
+ vport->vport,
+ esw->offloads.inline_mode);
+ }
+ return err;
+}
+
int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- int err, vport, num_vport;
struct mlx5_eswitch *esw;
u8 mlx5_mode;
+ int err;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
@@ -2967,25 +3083,14 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
if (err)
goto out;
- mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
- err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed to set min inline on vport");
- goto revert_inline_mode;
- }
- }
+ err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
+ if (err)
+ goto out;
esw->offloads.inline_mode = mlx5_mode;
up_write(&esw->mode_lock);
return 0;
-revert_inline_mode:
- num_vport = --vport;
- mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
- mlx5_modify_nic_vport_min_inline(dev,
- vport,
- esw->offloads.inline_mode);
out:
up_write(&esw->mode_lock);
return err;
@@ -3116,11 +3221,11 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
{
struct mlx5_eswitch_rep_data *rep_data;
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
esw->offloads.rep_ops[rep_type] = ops;
- mlx5_esw_for_all_reps(esw, i, rep) {
- if (likely(mlx5_eswitch_vport_has_rep(esw, i))) {
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) {
rep->esw = esw;
rep_data = &rep->rep_data[rep_type];
atomic_set(&rep_data->state, REP_REGISTERED);
@@ -3132,12 +3237,12 @@ EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
if (esw->mode == MLX5_ESWITCH_OFFLOADS)
__unload_reps_all_vport(esw, rep_type);
- mlx5_esw_for_all_reps(esw, i, rep)
+ mlx5_esw_for_each_rep(esw, i, rep)
atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
}
EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
@@ -3178,12 +3283,6 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
-{
- return vport_num >= MLX5_VPORT_FIRST_VF &&
- vport_num <= esw->dev->priv.sriov.max_vfs;
-}
-
bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
{
return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED);
@@ -3209,7 +3308,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum)
+ u16 vport_num, u32 controller, u32 sfnum)
{
int err;
@@ -3217,7 +3316,7 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
if (err)
return err;
- err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, sfnum);
+ err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum);
if (err)
goto devlink_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index 90b524c59f3c..6a0c6f965ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -148,9 +148,19 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
const struct mlx5_vhca_state_event *event = data;
struct mlx5_sf_dev *sf_dev;
+ u16 max_functions;
u16 sf_index;
+ u16 base_id;
+
+ max_functions = mlx5_sf_max_functions(table->dev);
+ if (!max_functions)
+ return 0;
+
+ base_id = MLX5_CAP_GEN(table->dev, sf_base_id);
+ if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
+ return 0;
- sf_index = event->function_id - MLX5_CAP_GEN(table->dev, sf_base_id);
+ sf_index = event->function_id - base_id;
sf_dev = xa_load(&table->devices, sf_index);
switch (event->new_vhca_state) {
case MLX5_VHCA_STATE_ALLOCATED:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 52226d9b9a6d..a8e73c9ed1ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -12,6 +12,7 @@
struct mlx5_sf {
struct devlink_port dl_port;
unsigned int port_index;
+ u32 controller;
u16 id;
u16 hw_fn_id;
u16 hw_state;
@@ -58,7 +59,8 @@ static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf)
}
static struct mlx5_sf *
-mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *extack)
+mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw,
+ u32 controller, u32 sfnum, struct netlink_ext_ack *extack)
{
unsigned int dl_port_index;
struct mlx5_sf *sf;
@@ -66,7 +68,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
int id_err;
int err;
- id_err = mlx5_sf_hw_table_sf_alloc(table->dev, sfnum);
+ if (!mlx5_esw_offloads_controller_valid(esw, controller)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid controller number");
+ return ERR_PTR(-EINVAL);
+ }
+
+ id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum);
if (id_err < 0) {
err = id_err;
goto id_err;
@@ -78,11 +85,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
goto alloc_err;
}
sf->id = id_err;
- hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sf->id);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id);
dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id);
sf->port_index = dl_port_index;
sf->hw_fn_id = hw_fn_id;
sf->hw_state = MLX5_VHCA_STATE_ALLOCATED;
+ sf->controller = controller;
err = mlx5_sf_id_insert(table, sf);
if (err)
@@ -93,7 +101,7 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
insert_err:
kfree(sf);
alloc_err:
- mlx5_sf_hw_table_sf_free(table->dev, id_err);
+ mlx5_sf_hw_table_sf_free(table->dev, controller, id_err);
id_err:
if (err == -EEXIST)
NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum");
@@ -103,7 +111,7 @@ id_err:
static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf)
{
mlx5_sf_id_erase(table, sf);
- mlx5_sf_hw_table_sf_free(table->dev, sf->id);
+ mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id);
kfree(sf);
}
@@ -272,12 +280,12 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
struct mlx5_sf *sf;
int err;
- sf = mlx5_sf_alloc(table, new_attr->sfnum, extack);
+ sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack);
if (IS_ERR(sf))
return PTR_ERR(sf);
err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id,
- new_attr->sfnum);
+ new_attr->controller, new_attr->sfnum);
if (err)
goto esw_err;
*new_port_index = sf->port_index;
@@ -306,7 +314,8 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_
"User must provide unique sfnum. Driver does not support auto assignment");
return -EOPNOTSUPP;
}
- if (new_attr->controller_valid && new_attr->controller) {
+ if (new_attr->controller_valid && new_attr->controller &&
+ !mlx5_core_is_ecpf_esw_manager(dev)) {
NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported");
return -EOPNOTSUPP;
}
@@ -352,10 +361,10 @@ static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf)
* firmware gives confirmation that it is detached by the driver.
*/
mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id);
- mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id);
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
kfree(sf);
} else {
- mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id);
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
kfree(sf);
}
}
@@ -437,9 +446,6 @@ sf_err:
static void mlx5_sf_table_enable(struct mlx5_sf_table *table)
{
- if (!mlx5_sf_max_functions(table->dev))
- return;
-
init_completion(&table->disable_complete);
refcount_set(&table->refcount, 1);
}
@@ -462,9 +468,6 @@ static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table)
static void mlx5_sf_table_disable(struct mlx5_sf_table *table)
{
- if (!mlx5_sf_max_functions(table->dev))
- return;
-
if (!refcount_read(&table->refcount))
return;
@@ -498,7 +501,8 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev)
{
- return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && mlx5_sf_supported(dev);
+ return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) &&
+ mlx5_sf_hw_table_supported(dev);
}
int mlx5_sf_table_init(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index ec53c11c8344..ef5f892aafad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -8,6 +8,7 @@
#include "ecpf.h"
#include "vhca_event.h"
#include "mlx5_core.h"
+#include "eswitch.h"
struct mlx5_sf_hw {
u32 usr_sfnum;
@@ -15,59 +16,113 @@ struct mlx5_sf_hw {
u8 pending_delete: 1;
};
+struct mlx5_sf_hwc_table {
+ struct mlx5_sf_hw *sfs;
+ int max_fn;
+ u16 start_fn_id;
+};
+
+enum mlx5_sf_hwc_index {
+ MLX5_SF_HWC_LOCAL,
+ MLX5_SF_HWC_EXTERNAL,
+ MLX5_SF_HWC_MAX,
+};
+
struct mlx5_sf_hw_table {
struct mlx5_core_dev *dev;
- struct mlx5_sf_hw *sfs;
- int max_local_functions;
struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
struct notifier_block vhca_nb;
+ struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX];
};
-u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id)
+static struct mlx5_sf_hwc_table *
+mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller)
{
- return sw_id + mlx5_sf_start_function_id(dev);
+ int idx = !!controller;
+
+ return &dev->priv.sf_hw_table->hwc[idx];
}
-static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id)
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id)
{
- return hw_id - mlx5_sf_start_function_id(dev);
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
+ return hwc->start_fn_id + sw_id;
}
-int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
+static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id)
+{
+ return hw_id - hwc->start_fn_id;
+}
+
+static struct mlx5_sf_hwc_table *
+mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id)
{
- struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
- int sw_id = -ENOSPC;
- u16 hw_fn_id;
- int err;
int i;
- if (!table->max_local_functions)
- return -EOPNOTSUPP;
+ for (i = 0; i < ARRAY_SIZE(table->hwc); i++) {
+ if (table->hwc[i].max_fn &&
+ fn_id >= table->hwc[i].start_fn_id &&
+ fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn))
+ return &table->hwc[i];
+ }
+ return NULL;
+}
+
+static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller,
+ u32 usr_sfnum)
+{
+ struct mlx5_sf_hwc_table *hwc;
+ int i;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ if (!hwc->sfs)
+ return -ENOSPC;
- mutex_lock(&table->table_lock);
/* Check if sf with same sfnum already exists or not. */
- for (i = 0; i < table->max_local_functions; i++) {
- if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum) {
- err = -EEXIST;
- goto exist_err;
- }
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum)
+ return -EEXIST;
}
-
/* Find the free entry and allocate the entry from the array */
- for (i = 0; i < table->max_local_functions; i++) {
- if (!table->sfs[i].allocated) {
- table->sfs[i].usr_sfnum = usr_sfnum;
- table->sfs[i].allocated = true;
- sw_id = i;
- break;
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (!hwc->sfs[i].allocated) {
+ hwc->sfs[i].usr_sfnum = usr_sfnum;
+ hwc->sfs[i].allocated = true;
+ return i;
}
}
- if (sw_id == -ENOSPC) {
- err = -ENOSPC;
+ return -ENOSPC;
+}
+
+static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id)
+{
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ hwc->sfs[id].allocated = false;
+ hwc->sfs[id].pending_delete = false;
+}
+
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u16 hw_fn_id;
+ int sw_id;
+ int err;
+
+ if (!table)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&table->table_lock);
+ sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum);
+ if (sw_id < 0) {
+ err = sw_id;
goto exist_err;
}
- hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sw_id);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id);
err = mlx5_cmd_alloc_sf(dev, hw_fn_id);
if (err)
goto err;
@@ -76,47 +131,58 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
if (err)
goto vhca_err;
+ if (controller) {
+ /* If this SF is for external controller, SF manager
+ * needs to arm firmware to receive the events.
+ */
+ err = mlx5_vhca_event_arm(dev, hw_fn_id);
+ if (err)
+ goto vhca_err;
+ }
+
mutex_unlock(&table->table_lock);
return sw_id;
vhca_err:
mlx5_cmd_dealloc_sf(dev, hw_fn_id);
err:
- table->sfs[i].allocated = false;
+ mlx5_sf_hw_table_id_free(table, controller, sw_id);
exist_err:
mutex_unlock(&table->table_lock);
return err;
}
-static void _mlx5_sf_hw_id_free(struct mlx5_core_dev *dev, u16 id)
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
{
struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
u16 hw_fn_id;
- hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
+ mutex_lock(&table->table_lock);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
mlx5_cmd_dealloc_sf(dev, hw_fn_id);
- table->sfs[id].allocated = false;
- table->sfs[id].pending_delete = false;
+ mlx5_sf_hw_table_id_free(table, controller, id);
+ mutex_unlock(&table->table_lock);
}
-void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id)
+static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc, int idx)
{
- struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
-
- mutex_lock(&table->table_lock);
- _mlx5_sf_hw_id_free(dev, id);
- mutex_unlock(&table->table_lock);
+ mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx);
+ hwc->sfs[idx].allocated = false;
+ hwc->sfs[idx].pending_delete = false;
}
-void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
{
struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ struct mlx5_sf_hwc_table *hwc;
u16 hw_fn_id;
u8 state;
int err;
- hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
mutex_lock(&table->table_lock);
err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
if (err)
@@ -124,53 +190,102 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
if (state == MLX5_VHCA_STATE_ALLOCATED) {
mlx5_cmd_dealloc_sf(dev, hw_fn_id);
- table->sfs[id].allocated = false;
+ hwc->sfs[id].allocated = false;
} else {
- table->sfs[id].pending_delete = true;
+ hwc->sfs[id].pending_delete = true;
}
err:
mutex_unlock(&table->table_lock);
}
-static void mlx5_sf_hw_dealloc_all(struct mlx5_sf_hw_table *table)
+static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc)
{
int i;
- for (i = 0; i < table->max_local_functions; i++) {
- if (table->sfs[i].allocated)
- _mlx5_sf_hw_id_free(table->dev, i);
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (hwc->sfs[i].allocated)
+ mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i);
}
}
+static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table)
+{
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]);
+}
+
+static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id)
+{
+ struct mlx5_sf_hw *sfs;
+
+ if (!max_fn)
+ return 0;
+
+ sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL);
+ if (!sfs)
+ return -ENOMEM;
+
+ hwc->sfs = sfs;
+ hwc->max_fn = max_fn;
+ hwc->start_fn_id = base_id;
+ return 0;
+}
+
+static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc)
+{
+ kfree(hwc->sfs);
+}
+
int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
{
struct mlx5_sf_hw_table *table;
- struct mlx5_sf_hw *sfs;
- int max_functions;
+ u16 max_ext_fn = 0;
+ u16 ext_base_id;
+ u16 max_fn = 0;
+ u16 base_id;
+ int err;
- if (!mlx5_sf_supported(dev) || !mlx5_vhca_event_supported(dev))
+ if (!mlx5_vhca_event_supported(dev))
+ return 0;
+
+ if (mlx5_sf_supported(dev))
+ max_fn = mlx5_sf_max_functions(dev);
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id);
+ if (err)
+ return err;
+
+ if (!max_fn && !max_ext_fn)
return 0;
- max_functions = mlx5_sf_max_functions(dev);
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;
- sfs = kcalloc(max_functions, sizeof(*sfs), GFP_KERNEL);
- if (!sfs)
- goto table_err;
-
mutex_init(&table->table_lock);
table->dev = dev;
- table->sfs = sfs;
- table->max_local_functions = max_functions;
dev->priv.sf_hw_table = table;
- mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions);
+
+ base_id = mlx5_sf_start_function_id(dev);
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id);
+ if (err)
+ goto table_err;
+
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL],
+ max_ext_fn, ext_base_id);
+ if (err)
+ goto ext_err;
+
+ mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn);
return 0;
+ext_err:
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
table_err:
+ mutex_destroy(&table->table_lock);
kfree(table);
- return -ENOMEM;
+ return err;
}
void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
@@ -181,7 +296,8 @@ void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
return;
mutex_destroy(&table->table_lock);
- kfree(table->sfs);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
kfree(table);
}
@@ -189,21 +305,26 @@ static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode
{
struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb);
const struct mlx5_vhca_state_event *event = data;
+ struct mlx5_sf_hwc_table *hwc;
struct mlx5_sf_hw *sf_hw;
u16 sw_id;
if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED)
return 0;
- sw_id = mlx5_sf_hw_to_sw_id(table->dev, event->function_id);
- sf_hw = &table->sfs[sw_id];
+ hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id);
+ if (!hwc)
+ return 0;
+
+ sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id);
+ sf_hw = &hwc->sfs[sw_id];
mutex_lock(&table->table_lock);
/* SF driver notified through firmware that SF is finally detached.
* Hence recycle the sf hardware id for reuse.
*/
if (sf_hw->allocated && sf_hw->pending_delete)
- _mlx5_sf_hw_id_free(table->dev, sw_id);
+ mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id);
mutex_unlock(&table->table_lock);
return 0;
}
@@ -228,5 +349,10 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb);
/* Dealloc SFs whose firmware event has been missed. */
- mlx5_sf_hw_dealloc_all(table);
+ mlx5_sf_hw_table_dealloc_all(table);
+}
+
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev)
+{
+ return !!dev->priv.sf_hw_table;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
index cb02a51d0986..7114f3fc335f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
@@ -12,10 +12,11 @@ int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id);
int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
-u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id);
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id);
-int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum);
-void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id);
-void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id);
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum);
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index e05c5c0f3ae1..457ad42eaa2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1151,20 +1151,6 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
-/**
- * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
- *
- * @dev: Pointer to core device
- *
- * mlx5_eswitch_get_total_vports returns total number of vports for
- * the eswitch.
- */
-u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
-{
- return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev);
-}
-EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
-
int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
{
u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index 713ee3041d49..bea978df7713 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
@@ -364,6 +364,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
attrs.split = eth_port.is_split;
attrs.splittable = !attrs.split;
+ attrs.lanes = eth_port.port_lanes;
attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
attrs.phys.port_number = eth_port.label_port;
attrs.phys.split_subport_number = eth_port.label_subport;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 9e5dad41cdc9..4afff320dfd0 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -913,31 +913,20 @@ static int ravb_poll(struct napi_struct *napi, int budget)
int q = napi - priv->napi;
int mask = BIT(q);
int quota = budget;
- u32 ris0, tis;
- for (;;) {
- tis = ravb_read(ndev, TIS);
- ris0 = ravb_read(ndev, RIS0);
- if (!((ris0 & mask) || (tis & mask)))
- break;
+ /* Processing RX Descriptor Ring */
+ /* Clear RX interrupt */
+ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
+ if (ravb_rx(ndev, &quota, q))
+ goto out;
- /* Processing RX Descriptor Ring */
- if (ris0 & mask) {
- /* Clear RX interrupt */
- ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- if (ravb_rx(ndev, &quota, q))
- goto out;
- }
- /* Processing TX Descriptor Ring */
- if (tis & mask) {
- spin_lock_irqsave(&priv->lock, flags);
- /* Clear TX interrupt */
- ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
- ravb_tx_free(ndev, q, true);
- netif_wake_subqueue(ndev, q);
- spin_unlock_irqrestore(&priv->lock, flags);
- }
- }
+ /* Processing RX Descriptor Ring */
+ spin_lock_irqsave(&priv->lock, flags);
+ /* Clear TX interrupt */
+ ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
+ ravb_tx_free(ndev, q, true);
+ netif_wake_subqueue(ndev, q);
+ spin_unlock_irqrestore(&priv->lock, flags);
napi_complete(napi);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index c873f961d5a5..c3f35da1b82a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2944,8 +2944,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
/* Get the transmit queue */
tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
- tx_queue = efx_channel_get_tx_queue(channel,
- tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue + (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
if (!tx_queue->timestamping) {
/* Transmit completion */
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index d75cf5ff5686..49df02ecee91 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -835,14 +835,14 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
/* Transmit completion */
tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
- tx_queue = efx_channel_get_tx_queue(
- channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue +
+ (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
efx_xmit_done(tx_queue, tx_ev_desc_ptr);
} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
/* Rewrite the FIFO write pointer */
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
- tx_queue = efx_channel_get_tx_queue(
- channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue +
+ (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
netif_tx_lock(efx->net_dev);
efx_farch_notify_tx_desc(tx_queue);
@@ -1081,16 +1081,16 @@ static void
efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
{
struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
int qid;
qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
- tx_queue = efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL,
- qid % EFX_MAX_TXQ_PER_CHANNEL);
- if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
+ channel = efx_get_tx_channel(efx, qid / EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue + (qid % EFX_MAX_TXQ_PER_CHANNEL);
+ if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0))
efx_farch_magic_event(tx_queue->channel,
EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
- }
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 372090e8ee6f..a9a984c57d78 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3303,8 +3303,15 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
/* Enable TSO */
if (priv->tso) {
- for (chan = 0; chan < tx_cnt; chan++)
+ for (chan = 0; chan < tx_cnt; chan++) {
+ struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+ /* TSO and TBS cannot co-exist */
+ if (tx_q->tbs & STMMAC_TBS_AVAIL)
+ continue;
+
stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
+ }
}
/* Enable Split Header */
@@ -3674,9 +3681,8 @@ int stmmac_open(struct net_device *dev)
struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
int tbs_en = priv->plat->tx_queues_cfg[chan].tbs_en;
+ /* Setup per-TXQ tbs flag before TX descriptor alloc */
tx_q->tbs |= tbs_en ? STMMAC_TBS_AVAIL : 0;
- if (stmmac_enable_tbs(priv, priv->ioaddr, tbs_en, chan))
- tx_q->tbs &= ~STMMAC_TBS_AVAIL;
}
ret = alloc_dma_desc_resources(priv);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 14e7da7d302f..f9417b44cae8 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -169,11 +169,11 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
/* EMAC mac_status register */
#define EMAC_MACSTATUS_TXERRCODE_MASK (0xF00000)
#define EMAC_MACSTATUS_TXERRCODE_SHIFT (20)
-#define EMAC_MACSTATUS_TXERRCH_MASK (0x7)
+#define EMAC_MACSTATUS_TXERRCH_MASK (0x70000)
#define EMAC_MACSTATUS_TXERRCH_SHIFT (16)
#define EMAC_MACSTATUS_RXERRCODE_MASK (0xF000)
#define EMAC_MACSTATUS_RXERRCODE_SHIFT (12)
-#define EMAC_MACSTATUS_RXERRCH_MASK (0x7)
+#define EMAC_MACSTATUS_RXERRCH_MASK (0x700)
#define EMAC_MACSTATUS_RXERRCH_SHIFT (8)
/* EMAC RX register masks */
diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig
index 7b83a6e5d894..468ffe3d1707 100644
--- a/drivers/net/ethernet/xscale/Kconfig
+++ b/drivers/net/ethernet/xscale/Kconfig
@@ -22,6 +22,7 @@ config IXP4XX_ETH
tristate "Intel IXP4xx Ethernet support"
depends on ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR
select PHYLIB
+ select OF_MDIO if OF
select NET_PTP_CLASSIFY
help
Say Y here if you want to use built-in Ethernet ports
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 0152f1e70783..cb89323855d8 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -28,6 +28,7 @@
#include <linux/kernel.h>
#include <linux/net_tstamp.h>
#include <linux/of.h>
+#include <linux/of_mdio.h>
#include <linux/phy.h>
#include <linux/platform_data/eth_ixp4xx.h>
#include <linux/platform_device.h>
@@ -165,7 +166,6 @@ struct eth_regs {
};
struct port {
- struct resource *mem_res;
struct eth_regs __iomem *regs;
struct npe *npe;
struct net_device *netdev;
@@ -250,6 +250,7 @@ static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt)
static DEFINE_SPINLOCK(mdio_lock);
static struct eth_regs __iomem *mdio_regs; /* mdio command and status only */
static struct mii_bus *mdio_bus;
+static struct device_node *mdio_bus_np;
static int ports_open;
static struct port *npe_port_tab[MAX_NPES];
static struct dma_pool *dma_pool;
@@ -533,7 +534,8 @@ static int ixp4xx_mdio_register(struct eth_regs __iomem *regs)
mdio_bus->write = &ixp4xx_mdio_write;
snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "ixp4xx-eth-0");
- if ((err = mdiobus_register(mdio_bus)))
+ err = of_mdiobus_register(mdio_bus, mdio_bus_np);
+ if (err)
mdiobus_free(mdio_bus);
return err;
}
@@ -1085,7 +1087,7 @@ static int init_queues(struct port *port)
int i;
if (!ports_open) {
- dma_pool = dma_pool_create(DRV_NAME, port->netdev->dev.parent,
+ dma_pool = dma_pool_create(DRV_NAME, &port->netdev->dev,
POOL_ALLOC_SIZE, 32, 0);
if (!dma_pool)
return -ENOMEM;
@@ -1358,19 +1360,118 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
};
+#ifdef CONFIG_OF
+static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev)
+{
+ struct device_node *np = dev->of_node;
+ struct of_phandle_args queue_spec;
+ struct of_phandle_args npe_spec;
+ struct device_node *mdio_np;
+ struct eth_plat_info *plat;
+ int ret;
+
+ plat = devm_kzalloc(dev, sizeof(*plat), GFP_KERNEL);
+ if (!plat)
+ return NULL;
+
+ ret = of_parse_phandle_with_fixed_args(np, "intel,npe-handle", 1, 0,
+ &npe_spec);
+ if (ret) {
+ dev_err(dev, "no NPE engine specified\n");
+ return NULL;
+ }
+ /* NPE ID 0x00, 0x10, 0x20... */
+ plat->npe = (npe_spec.args[0] << 4);
+
+ /* Check if this device has an MDIO bus */
+ mdio_np = of_get_child_by_name(np, "mdio");
+ if (mdio_np) {
+ plat->has_mdio = true;
+ mdio_bus_np = mdio_np;
+ /* DO NOT put the mdio_np, it will be used */
+ }
+
+ /* Get the rx queue as a resource from queue manager */
+ ret = of_parse_phandle_with_fixed_args(np, "queue-rx", 1, 0,
+ &queue_spec);
+ if (ret) {
+ dev_err(dev, "no rx queue phandle\n");
+ return NULL;
+ }
+ plat->rxq = queue_spec.args[0];
+
+ /* Get the txready queue as resource from queue manager */
+ ret = of_parse_phandle_with_fixed_args(np, "queue-txready", 1, 0,
+ &queue_spec);
+ if (ret) {
+ dev_err(dev, "no txready queue phandle\n");
+ return NULL;
+ }
+ plat->txreadyq = queue_spec.args[0];
+
+ return plat;
+}
+#else
+static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev)
+{
+ return NULL;
+}
+#endif
+
static int ixp4xx_eth_probe(struct platform_device *pdev)
{
- char phy_id[MII_BUS_ID_SIZE + 3];
struct phy_device *phydev = NULL;
struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
struct eth_plat_info *plat;
- resource_size_t regs_phys;
struct net_device *ndev;
struct resource *res;
struct port *port;
int err;
- plat = dev_get_platdata(dev);
+ if (np) {
+ plat = ixp4xx_of_get_platdata(dev);
+ if (!plat)
+ return -ENODEV;
+ } else {
+ plat = dev_get_platdata(dev);
+ if (!plat)
+ return -ENODEV;
+ plat->npe = pdev->id;
+ switch (plat->npe) {
+ case IXP4XX_ETH_NPEA:
+ /* If the MDIO bus is not up yet, defer probe */
+ break;
+ case IXP4XX_ETH_NPEB:
+ /* On all except IXP43x, NPE-B is used for the MDIO bus.
+ * If there is no NPE-B in the feature set, bail out,
+ * else we have the MDIO bus here.
+ */
+ if (!cpu_is_ixp43x()) {
+ if (!(ixp4xx_read_feature_bits() &
+ IXP4XX_FEATURE_NPEB_ETH0))
+ return -ENODEV;
+ /* Else register the MDIO bus on NPE-B */
+ plat->has_mdio = true;
+ }
+ break;
+ case IXP4XX_ETH_NPEC:
+ /* IXP43x lacks NPE-B and uses NPE-C for the MDIO bus
+ * access, if there is no NPE-C, no bus, nothing works,
+ * so bail out.
+ */
+ if (cpu_is_ixp43x()) {
+ if (!(ixp4xx_read_feature_bits() &
+ IXP4XX_FEATURE_NPEC_ETH))
+ return -ENODEV;
+ /* Else register the MDIO bus on NPE-B */
+ plat->has_mdio = true;
+ }
+ break;
+ default:
+ return -ENODEV;
+ }
+ }
if (!(ndev = devm_alloc_etherdev(dev, sizeof(struct port))))
return -ENOMEM;
@@ -1378,75 +1479,42 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
SET_NETDEV_DEV(ndev, dev);
port = netdev_priv(ndev);
port->netdev = ndev;
- port->id = pdev->id;
+ port->id = plat->npe;
/* Get the port resource and remap */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -ENODEV;
- regs_phys = res->start;
port->regs = devm_ioremap_resource(dev, res);
if (IS_ERR(port->regs))
return PTR_ERR(port->regs);
- switch (port->id) {
- case IXP4XX_ETH_NPEA:
- /* If the MDIO bus is not up yet, defer probe */
- if (!mdio_bus)
- return -EPROBE_DEFER;
- break;
- case IXP4XX_ETH_NPEB:
- /*
- * On all except IXP43x, NPE-B is used for the MDIO bus.
- * If there is no NPE-B in the feature set, bail out, else
- * register the MDIO bus.
- */
- if (!cpu_is_ixp43x()) {
- if (!(ixp4xx_read_feature_bits() &
- IXP4XX_FEATURE_NPEB_ETH0))
- return -ENODEV;
- /* Else register the MDIO bus on NPE-B */
- if ((err = ixp4xx_mdio_register(port->regs)))
- return err;
- }
- if (!mdio_bus)
- return -EPROBE_DEFER;
- break;
- case IXP4XX_ETH_NPEC:
- /*
- * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access,
- * of there is no NPE-C, no bus, nothing works, so bail out.
- */
- if (cpu_is_ixp43x()) {
- if (!(ixp4xx_read_feature_bits() &
- IXP4XX_FEATURE_NPEC_ETH))
- return -ENODEV;
- /* Else register the MDIO bus on NPE-C */
- if ((err = ixp4xx_mdio_register(port->regs)))
- return err;
+ /* Register the MDIO bus if we have it */
+ if (plat->has_mdio) {
+ err = ixp4xx_mdio_register(port->regs);
+ if (err) {
+ dev_err(dev, "failed to register MDIO bus\n");
+ return err;
}
- if (!mdio_bus)
- return -EPROBE_DEFER;
- break;
- default:
- return -ENODEV;
}
+ /* If the instance with the MDIO bus has not yet appeared,
+ * defer probing until it gets probed.
+ */
+ if (!mdio_bus)
+ return -EPROBE_DEFER;
ndev->netdev_ops = &ixp4xx_netdev_ops;
ndev->ethtool_ops = &ixp4xx_ethtool_ops;
ndev->tx_queue_len = 100;
+ /* Inherit the DMA masks from the platform device */
+ ndev->dev.dma_mask = dev->dma_mask;
+ ndev->dev.coherent_dma_mask = dev->coherent_dma_mask;
netif_napi_add(ndev, &port->napi, eth_poll, NAPI_WEIGHT);
if (!(port->npe = npe_request(NPE_ID(port->id))))
return -EIO;
- port->mem_res = request_mem_region(regs_phys, REGS_SIZE, ndev->name);
- if (!port->mem_res) {
- err = -EBUSY;
- goto err_npe_rel;
- }
-
port->plat = plat;
npe_port_tab[NPE_ID(port->id)] = port;
memcpy(ndev->dev_addr, plat->hwaddr, ETH_ALEN);
@@ -1459,12 +1527,24 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
__raw_writel(DEFAULT_CORE_CNTRL, &port->regs->core_control);
udelay(50);
- snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
- mdio_bus->id, plat->phy);
- phydev = phy_connect(ndev, phy_id, &ixp4xx_adjust_link,
- PHY_INTERFACE_MODE_MII);
- if (IS_ERR(phydev)) {
- err = PTR_ERR(phydev);
+ if (np) {
+ phydev = of_phy_get_and_connect(ndev, np, ixp4xx_adjust_link);
+ } else {
+ phydev = mdiobus_get_phy(mdio_bus, plat->phy);
+ if (IS_ERR(phydev)) {
+ err = PTR_ERR(phydev);
+ dev_err(dev, "could not connect phydev (%d)\n", err);
+ goto err_free_mem;
+ }
+ err = phy_connect_direct(ndev, phydev, ixp4xx_adjust_link,
+ PHY_INTERFACE_MODE_MII);
+ if (err)
+ goto err_free_mem;
+
+ }
+ if (!phydev) {
+ err = -ENODEV;
+ dev_err(dev, "no phydev\n");
goto err_free_mem;
}
@@ -1482,8 +1562,6 @@ err_phy_dis:
phy_disconnect(phydev);
err_free_mem:
npe_port_tab[NPE_ID(port->id)] = NULL;
- release_resource(port->mem_res);
-err_npe_rel:
npe_release(port->npe);
return err;
}
@@ -1499,12 +1577,21 @@ static int ixp4xx_eth_remove(struct platform_device *pdev)
ixp4xx_mdio_remove();
npe_port_tab[NPE_ID(port->id)] = NULL;
npe_release(port->npe);
- release_resource(port->mem_res);
return 0;
}
+static const struct of_device_id ixp4xx_eth_of_match[] = {
+ {
+ .compatible = "intel,ixp4xx-ethernet",
+ },
+ { },
+};
+
static struct platform_driver ixp4xx_eth_driver = {
- .driver.name = DRV_NAME,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = of_match_ptr(ixp4xx_eth_of_match),
+ },
.probe = ixp4xx_eth_probe,
.remove = ixp4xx_eth_remove,
};
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 072de880b99f..1ab94b5f9bbf 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -892,7 +892,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+ if (!pskb_inet_may_pull(skb))
return -EINVAL;
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
@@ -989,7 +989,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+ if (!pskb_inet_may_pull(skb))
return -EINVAL;
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7349a70af083..f682a5572d84 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2297,6 +2297,7 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
{
struct device *parent = vf_netdev->dev.parent;
struct net_device_context *ndev_ctx;
+ struct net_device *ndev;
struct pci_dev *pdev;
u32 serial;
@@ -2319,8 +2320,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
if (!ndev_ctx->vf_alloc)
continue;
- if (ndev_ctx->vf_serial == serial)
- return hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ndev_ctx->vf_serial != serial)
+ continue;
+
+ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ndev->addr_len != vf_netdev->addr_len ||
+ memcmp(ndev->perm_addr, vf_netdev->perm_addr,
+ ndev->addr_len) != 0)
+ continue;
+
+ return ndev;
+
}
netdev_notice(vf_netdev,
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9a9a5cf36a4b..7427b989607e 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -423,18 +423,24 @@ static void macvlan_forward_source_one(struct sk_buff *skb,
macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false);
}
-static void macvlan_forward_source(struct sk_buff *skb,
+static bool macvlan_forward_source(struct sk_buff *skb,
struct macvlan_port *port,
const unsigned char *addr)
{
struct macvlan_source_entry *entry;
u32 idx = macvlan_eth_hash(addr);
struct hlist_head *h = &port->vlan_source_hash[idx];
+ bool consume = false;
hlist_for_each_entry_rcu(entry, h, hlist) {
- if (ether_addr_equal_64bits(entry->addr, addr))
+ if (ether_addr_equal_64bits(entry->addr, addr)) {
+ if (entry->vlan->flags & MACVLAN_FLAG_NODST)
+ consume = true;
macvlan_forward_source_one(skb, entry->vlan);
+ }
}
+
+ return consume;
}
/* called under rcu_read_lock() from netif_receive_skb */
@@ -463,7 +469,8 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_CONSUMED;
*pskb = skb;
eth = eth_hdr(skb);
- macvlan_forward_source(skb, port, eth->h_source);
+ if (macvlan_forward_source(skb, port, eth->h_source))
+ return RX_HANDLER_CONSUMED;
src = macvlan_hash_lookup(port, eth->h_source);
if (src && src->mode != MACVLAN_MODE_VEPA &&
src->mode != MACVLAN_MODE_BRIDGE) {
@@ -482,7 +489,8 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_PASS;
}
- macvlan_forward_source(skb, port, eth->h_source);
+ if (macvlan_forward_source(skb, port, eth->h_source))
+ return RX_HANDLER_CONSUMED;
if (macvlan_passthru(port))
vlan = list_first_or_null_rcu(&port->vlans,
struct macvlan_dev, list);
@@ -1286,7 +1294,8 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
if (data[IFLA_MACVLAN_FLAGS] &&
- nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC)
+ nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~(MACVLAN_FLAG_NOPROMISC |
+ MACVLAN_FLAG_NODST))
return -EINVAL;
if (data[IFLA_MACVLAN_MODE]) {
diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index 6eac50d4b42f..d453ec016168 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -11,6 +11,18 @@
#define XWAY_MDIO_IMASK 0x19 /* interrupt mask */
#define XWAY_MDIO_ISTAT 0x1A /* interrupt status */
+#define XWAY_MDIO_LED 0x1B /* led control */
+
+/* bit 15:12 are reserved */
+#define XWAY_MDIO_LED_LED3_EN BIT(11) /* Enable the integrated function of LED3 */
+#define XWAY_MDIO_LED_LED2_EN BIT(10) /* Enable the integrated function of LED2 */
+#define XWAY_MDIO_LED_LED1_EN BIT(9) /* Enable the integrated function of LED1 */
+#define XWAY_MDIO_LED_LED0_EN BIT(8) /* Enable the integrated function of LED0 */
+/* bit 7:4 are reserved */
+#define XWAY_MDIO_LED_LED3_DA BIT(3) /* Direct Access to LED3 */
+#define XWAY_MDIO_LED_LED2_DA BIT(2) /* Direct Access to LED2 */
+#define XWAY_MDIO_LED_LED1_DA BIT(1) /* Direct Access to LED1 */
+#define XWAY_MDIO_LED_LED0_DA BIT(0) /* Direct Access to LED0 */
#define XWAY_MDIO_INIT_WOL BIT(15) /* Wake-On-LAN */
#define XWAY_MDIO_INIT_MSRE BIT(14)
@@ -159,6 +171,15 @@ static int xway_gphy_config_init(struct phy_device *phydev)
/* Clear all pending interrupts */
phy_read(phydev, XWAY_MDIO_ISTAT);
+ /* Ensure that integrated led function is enabled for all leds */
+ err = phy_write(phydev, XWAY_MDIO_LED,
+ XWAY_MDIO_LED_LED0_EN |
+ XWAY_MDIO_LED_LED1_EN |
+ XWAY_MDIO_LED_LED2_EN |
+ XWAY_MDIO_LED_LED3_EN);
+ if (err)
+ return err;
+
phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LEDCH,
XWAY_MMD_LEDCH_NACS_NONE |
XWAY_MMD_LEDCH_SBF_F02HZ |
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e2b2b20c0dc5..a61fde7013bd 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -978,22 +978,28 @@ static int m88e1111_get_downshift(struct phy_device *phydev, u8 *data)
static int m88e1111_set_downshift(struct phy_device *phydev, u8 cnt)
{
- int val;
+ int val, err;
if (cnt > MII_M1111_PHY_EXT_CR_DOWNSHIFT_MAX)
return -E2BIG;
- if (!cnt)
- return phy_clear_bits(phydev, MII_M1111_PHY_EXT_CR,
- MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN);
+ if (!cnt) {
+ err = phy_clear_bits(phydev, MII_M1111_PHY_EXT_CR,
+ MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN);
+ } else {
+ val = MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN;
+ val |= FIELD_PREP(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, cnt - 1);
- val = MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN;
- val |= FIELD_PREP(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, cnt - 1);
+ err = phy_modify(phydev, MII_M1111_PHY_EXT_CR,
+ MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN |
+ MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK,
+ val);
+ }
- return phy_modify(phydev, MII_M1111_PHY_EXT_CR,
- MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN |
- MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK,
- val);
+ if (err < 0)
+ return err;
+
+ return genphy_soft_reset(phydev);
}
static int m88e1111_get_tunable(struct phy_device *phydev,
@@ -1036,22 +1042,28 @@ static int m88e1011_get_downshift(struct phy_device *phydev, u8 *data)
static int m88e1011_set_downshift(struct phy_device *phydev, u8 cnt)
{
- int val;
+ int val, err;
if (cnt > MII_M1011_PHY_SCR_DOWNSHIFT_MAX)
return -E2BIG;
- if (!cnt)
- return phy_clear_bits(phydev, MII_M1011_PHY_SCR,
- MII_M1011_PHY_SCR_DOWNSHIFT_EN);
+ if (!cnt) {
+ err = phy_clear_bits(phydev, MII_M1011_PHY_SCR,
+ MII_M1011_PHY_SCR_DOWNSHIFT_EN);
+ } else {
+ val = MII_M1011_PHY_SCR_DOWNSHIFT_EN;
+ val |= FIELD_PREP(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, cnt - 1);
- val = MII_M1011_PHY_SCR_DOWNSHIFT_EN;
- val |= FIELD_PREP(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, cnt - 1);
+ err = phy_modify(phydev, MII_M1011_PHY_SCR,
+ MII_M1011_PHY_SCR_DOWNSHIFT_EN |
+ MII_M1011_PHY_SCR_DOWNSHIFT_MASK,
+ val);
+ }
- return phy_modify(phydev, MII_M1011_PHY_SCR,
- MII_M1011_PHY_SCR_DOWNSHIFT_EN |
- MII_M1011_PHY_SCR_DOWNSHIFT_MASK,
- val);
+ if (err < 0)
+ return err;
+
+ return genphy_soft_reset(phydev);
}
static int m88e1011_get_tunable(struct phy_device *phydev,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 9986f8969d02..136ea06540ff 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -767,8 +767,6 @@ enum rtl8152_flags {
PHY_RESET,
SCHEDULE_TASKLET,
GREEN_ETHERNET,
- DELL_TB_RX_AGG_BUG,
- LENOVO_MACPASSTHRU,
};
#define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082
@@ -934,6 +932,8 @@ struct r8152 {
u32 fc_pause_on, fc_pause_off;
u32 support_2500full:1;
+ u32 lenovo_macpassthru:1;
+ u32 dell_tb_rx_agg_bug:1;
u16 ocp_base;
u16 speed;
u16 eee_adv;
@@ -1594,7 +1594,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
acpi_object_type mac_obj_type;
int mac_strlen;
- if (test_bit(LENOVO_MACPASSTHRU, &tp->flags)) {
+ if (tp->lenovo_macpassthru) {
mac_obj_name = "\\MACA";
mac_obj_type = ACPI_TYPE_STRING;
mac_strlen = 0x16;
@@ -2283,7 +2283,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
- if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+ if (tp->dell_tb_rx_agg_bug)
break;
}
@@ -6941,7 +6941,7 @@ static void r8153_init(struct r8152 *tp)
/* rx aggregation */
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
- if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+ if (tp->dell_tb_rx_agg_bug)
ocp_data |= RX_AGG_DISABLE;
ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
@@ -9447,7 +9447,7 @@ static int rtl8152_probe(struct usb_interface *intf,
switch (le16_to_cpu(udev->descriptor.idProduct)) {
case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2:
case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2:
- set_bit(LENOVO_MACPASSTHRU, &tp->flags);
+ tp->lenovo_macpassthru = 1;
}
}
@@ -9455,7 +9455,7 @@ static int rtl8152_probe(struct usb_interface *intf,
(!strcmp(udev->serial, "000001000000") ||
!strcmp(udev->serial, "000002000000"))) {
dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
- set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
+ tp->dell_tb_rx_agg_bug = 1;
}
netdev->ethtool_ops = &ops;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 4456abb9a074..34bde8c87324 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -40,6 +40,7 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
struct iwl_tfh_tfd *tfd;
+ unsigned long flags;
copy_size = sizeof(struct iwl_cmd_header_wide);
cmd_size = sizeof(struct iwl_cmd_header_wide);
@@ -108,14 +109,14 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
goto free_dup_buf;
}
- spin_lock_bh(&txq->lock);
+ spin_lock_irqsave(&txq->lock, flags);
idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr);
memset(tfd, 0, sizeof(*tfd));
if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
- spin_unlock_bh(&txq->lock);
+ spin_unlock_irqrestore(&txq->lock, flags);
IWL_ERR(trans, "No space in command queue\n");
iwl_op_mode_cmd_queue_full(trans->op_mode);
@@ -250,7 +251,7 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
spin_unlock(&trans_pcie->reg_lock);
out:
- spin_unlock_bh(&txq->lock);
+ spin_unlock_irqrestore(&txq->lock, flags);
free_dup_buf:
if (idx < 0)
kfree(dup_buf);
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 6a29fe11485d..8b77d08d4b47 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -458,7 +458,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
#else
-struct bpf_prog;
struct cgroup_bpf {};
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c9b7a876b0c8..ad4bcf1cadbb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -310,6 +310,7 @@ enum bpf_arg_type {
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
+ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
__BPF_ARG_TYPE_MAX,
};
@@ -930,7 +931,6 @@ struct bpf_link_primer {
};
struct bpf_struct_ops_value;
-struct btf_type;
struct btf_member;
#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
@@ -1955,6 +1955,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
+extern const struct bpf_func_proto bpf_snprintf_proto;
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
@@ -2080,4 +2081,24 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
+enum bpf_printf_mod_type {
+ BPF_PRINTF_INT,
+ BPF_PRINTF_LONG,
+ BPF_PRINTF_LONG_LONG,
+};
+
+/* Workaround for getting va_list handling working with different argument type
+ * combinations generically for 32 and 64 bit archs.
+ */
+#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \
+ (mod[arg_nb] == BPF_PRINTF_LONG_LONG || \
+ (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64) \
+ ? (u64)args[arg_nb] \
+ : (u32)args[arg_nb])
+
+int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+ u64 *final_args, enum bpf_printf_mod_type *mod,
+ u32 num_args);
+void bpf_printf_cleanup(void);
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 51c2ffa3d901..6023a1367853 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -487,6 +487,15 @@ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id;
}
+/* unpack the IDs from the key as constructed above */
+static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id)
+{
+ if (obj_id)
+ *obj_id = key >> 32;
+ if (btf_id)
+ *btf_id = key & 0x7FFFFFFF;
+}
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 9cf1da2883c6..17109b65c1ac 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -65,8 +65,6 @@ struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
struct mlx5_eswitch_rep *rep, u32 sqn);
-u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
-
#ifdef CONFIG_MLX5_ESWITCH
enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
@@ -126,6 +124,8 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
#define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK
u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
@@ -162,10 +162,17 @@ mlx5_eswitch_get_vport_metadata_mask(void)
{
return 0;
}
+
+static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
#endif /* CONFIG_MLX5_ESWITCH */
static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
{
return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS;
}
+
#endif
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 4db87bcfce7b..aad53cb72f17 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,14 +36,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
-#define MLX5_VPORT_PF_PLACEHOLDER (1u)
-#define MLX5_VPORT_UPLINK_PLACEHOLDER (1u)
-#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev) (mlx5_ecpf_vport_exists(mdev))
-
-#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER + \
- MLX5_VPORT_UPLINK_PLACEHOLDER + \
- MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
-
#define MLX5_VPORT_MANAGER(mdev) \
(MLX5_CAP_GEN(mdev, vport_group_manager) && \
(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h
index 6f652ea0c6ae..114b0940729f 100644
--- a/include/linux/platform_data/eth_ixp4xx.h
+++ b/include/linux/platform_data/eth_ixp4xx.h
@@ -14,6 +14,8 @@ struct eth_plat_info {
u8 rxq; /* configurable, currently 0 - 31 only */
u8 txreadyq;
u8 hwaddr[6];
+ u8 npe; /* NPE instance used by this interface */
+ bool has_mdio; /* If this instance has an MDIO bus */
};
#endif
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index e242bf3d2b4a..aba0f0f429be 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -99,7 +99,8 @@ struct sk_psock {
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
void (*saved_data_ready)(struct sock *sk);
- int (*psock_update_sk_prot)(struct sock *sk, bool restore);
+ int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock,
+ bool restore);
struct proto *sk_proto;
struct mutex work_mutex;
struct sk_psock_work_state work_state;
@@ -404,7 +405,7 @@ static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
if (psock->psock_update_sk_prot)
- psock->psock_update_sk_prot(sk, true);
+ psock->psock_update_sk_prot(sk, psock, true);
}
static inline void sk_psock_set_state(struct sk_psock *psock,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 853420db5d32..7c984cadfec4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -98,11 +98,13 @@ struct devlink_port_pci_vf_attrs {
* @controller: Associated controller number
* @sf: Associated PCI SF for of the PCI PF for this port.
* @pf: Associated PCI PF number for this port.
+ * @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_sf_attrs {
u32 controller;
u32 sf;
u16 pf;
+ u8 external:1;
};
/**
@@ -1508,7 +1510,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
u16 pf, u16 vf, bool external);
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
- u32 controller, u16 pf, u32 sf);
+ u32 controller, u16 pf, u32 sf,
+ bool external);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/net/sock.h b/include/net/sock.h
index cadcc12cc316..42bc5e1a627f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1118,6 +1118,7 @@ struct inet_hashinfo;
struct raw_hashinfo;
struct smc_hashinfo;
struct module;
+struct sk_psock;
/*
* caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
@@ -1189,7 +1190,9 @@ struct proto {
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
#ifdef CONFIG_BPF_SYSCALL
- int (*psock_update_sk_prot)(struct sock *sk, bool restore);
+ int (*psock_update_sk_prot)(struct sock *sk,
+ struct sk_psock *psock,
+ bool restore);
#endif
/* Keeping track of sockets in use */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eaea43afcc97..d05193cb0d99 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2215,7 +2215,7 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-int tcp_bpf_update_proto(struct sock *sk, bool restore);
+int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/include/net/udp.h b/include/net/udp.h
index f55aaeef7e91..360df454356c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -543,7 +543,7 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-int udp_bpf_update_proto(struct sock *sk, bool restore);
+int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
#endif
#endif /* _UDP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 49371eba98ba..ec6d85a81744 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -312,6 +312,27 @@ union bpf_iter_link_info {
* *ctx_out*, *data_out* (for example, packet data), result of the
* execution *retval*, and *duration* of the test run.
*
+ * The sizes of the buffers provided as input and output
+ * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ * be provided in the corresponding variables *ctx_size_in*,
+ * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ * of these parameters are not provided (ie set to NULL), the
+ * corresponding size field must be zero.
+ *
+ * Some program types have particular requirements:
+ *
+ * **BPF_PROG_TYPE_SK_LOOKUP**
+ * *data_in* and *data_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_XDP**
+ * *ctx_in* and *ctx_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ * *ctx_out*, *data_in* and *data_out* must be NULL.
+ * *repeat* must be zero.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
@@ -4061,12 +4082,20 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * An adaptive notification is a notification sent whenever the user-space
+ * process has caught up and consumed all available payloads. In case the user-space
+ * process is still processing a previous payload, then no notification is needed
+ * as it will process the newly added payload automatically.
* Return
* 0 on success, or a negative error in case of failure.
*
* void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
* Description
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * *flags* must be 0.
* Return
* Valid pointer with *size* bytes of memory available; NULL,
* otherwise.
@@ -4078,6 +4107,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4088,6 +4121,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4671,6 +4708,33 @@ union bpf_attr {
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ * Description
+ * Outputs a string into the **str** buffer of size **str_size**
+ * based on a format string stored in a read-only map pointed by
+ * **fmt**.
+ *
+ * Each format specifier in **fmt** corresponds to one u64 element
+ * in the **data** array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data*
+ * array. The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ * memory. Reading kernel memory may fail due to either invalid
+ * address or valid address but requiring a major memory fault. If
+ * reading kernel memory fails, the string for **%s** will be an
+ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ * Not returning error to bpf program is consistent with what
+ * **bpf_trace_printk**\ () does for now.
+ *
+ * Return
+ * The strictly positive length of the formatted string, including
+ * the trailing zero character. If the return value is greater than
+ * **str_size**, **str** contains a truncated string, guaranteed to
+ * be zero-terminated except when **str_size** is 0.
+ *
+ * Or **-EBUSY** if the per-CPU memory copy buffer is busy.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4838,6 +4902,7 @@ union bpf_attr {
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
+ FN(snprintf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5379,6 +5444,8 @@ struct bpf_link_info {
} raw_tracepoint;
struct {
__u32 attach_type;
+ __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+ __u32 target_btf_id; /* BTF type id inside the object */
} tracing;
struct {
__u64 cgroup_id;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 91c8dda6d95d..cd5b382a4138 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -614,6 +614,7 @@ enum macvlan_macaddr_mode {
};
#define MACVLAN_FLAG_NOPROMISC 1
+#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */
/* VRF section */
enum {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f5423251c118..5e31ee9f7512 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1363,11 +1363,10 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* __bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
* @insn: is the array of eBPF instructions
- * @stack: is the eBPF storage stack
*
* Decode and execute eBPF instructions.
*/
-static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
{
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
@@ -1701,7 +1700,7 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
ARG1 = (u64) (unsigned long) ctx; \
- return ___bpf_prog_run(regs, insn, stack); \
+ return ___bpf_prog_run(regs, insn); \
}
#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
@@ -1718,7 +1717,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
BPF_R3 = r3; \
BPF_R4 = r4; \
BPF_R5 = r5; \
- return ___bpf_prog_run(regs, insn, stack); \
+ return ___bpf_prog_run(regs, insn); \
}
#define EVAL1(FN, X) FN(X)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f306611c4ddf..85b26ca5aacd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -669,6 +669,310 @@ const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
};
+static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
+ size_t bufsz)
+{
+ void __user *user_ptr = (__force void __user *)unsafe_ptr;
+
+ buf[0] = 0;
+
+ switch (fmt_ptype) {
+ case 's':
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ if ((unsigned long)unsafe_ptr < TASK_SIZE)
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+ fallthrough;
+#endif
+ case 'k':
+ return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
+ case 'u':
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+ }
+
+ return -EINVAL;
+}
+
+/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+ */
+#define MAX_PRINTF_BUF_LEN 512
+
+struct bpf_printf_buf {
+ char tmp_buf[MAX_PRINTF_BUF_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
+static DEFINE_PER_CPU(int, bpf_printf_buf_used);
+
+static int try_get_fmt_tmp_buf(char **tmp_buf)
+{
+ struct bpf_printf_buf *bufs;
+ int used;
+
+ if (*tmp_buf)
+ return 0;
+
+ preempt_disable();
+ used = this_cpu_inc_return(bpf_printf_buf_used);
+ if (WARN_ON_ONCE(used > 1)) {
+ this_cpu_dec(bpf_printf_buf_used);
+ preempt_enable();
+ return -EBUSY;
+ }
+ bufs = this_cpu_ptr(&bpf_printf_buf);
+ *tmp_buf = bufs->tmp_buf;
+
+ return 0;
+}
+
+void bpf_printf_cleanup(void)
+{
+ if (this_cpu_read(bpf_printf_buf_used)) {
+ this_cpu_dec(bpf_printf_buf_used);
+ preempt_enable();
+ }
+}
+
+/*
+ * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
+ *
+ * Returns a negative value if fmt is an invalid format string or 0 otherwise.
+ *
+ * This can be used in two ways:
+ * - Format string verification only: when final_args and mod are NULL
+ * - Arguments preparation: in addition to the above verification, it writes in
+ * final_args a copy of raw_args where pointers from BPF have been sanitized
+ * into pointers safe to use by snprintf. This also writes in the mod array
+ * the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
+ *
+ * In argument preparation mode, if 0 is returned, safe temporary buffers are
+ * allocated and bpf_printf_cleanup should be called to free them after use.
+ */
+int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+ u64 *final_args, enum bpf_printf_mod_type *mod,
+ u32 num_args)
+{
+ char *unsafe_ptr = NULL, *tmp_buf = NULL, *fmt_end;
+ size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
+ int err, i, num_spec = 0, copy_size;
+ enum bpf_printf_mod_type cur_mod;
+ u64 cur_arg;
+ char fmt_ptype;
+
+ if (!!final_args != !!mod)
+ return -EINVAL;
+
+ fmt_end = strnchr(fmt, fmt_size, 0);
+ if (!fmt_end)
+ return -EINVAL;
+ fmt_size = fmt_end - fmt;
+
+ for (i = 0; i < fmt_size; i++) {
+ if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (fmt[i] != '%')
+ continue;
+
+ if (fmt[i + 1] == '%') {
+ i++;
+ continue;
+ }
+
+ if (num_spec >= num_args) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* The string is zero-terminated so if fmt[i] != 0, we can
+ * always access fmt[i + 1], in the worst case it will be a 0
+ */
+ i++;
+
+ /* skip optional "[0 +-][num]" width formatting field */
+ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
+ fmt[i] == ' ')
+ i++;
+ if (fmt[i] >= '1' && fmt[i] <= '9') {
+ i++;
+ while (fmt[i] >= '0' && fmt[i] <= '9')
+ i++;
+ }
+
+ if (fmt[i] == 'p') {
+ cur_mod = BPF_PRINTF_LONG;
+
+ if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
+ fmt[i + 2] == 's') {
+ fmt_ptype = fmt[i + 1];
+ i += 2;
+ goto fmt_str;
+ }
+
+ if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
+ ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
+ fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
+ fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
+ /* just kernel pointers */
+ if (final_args)
+ cur_arg = raw_args[num_spec];
+ goto fmt_next;
+ }
+
+ /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+ if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
+ (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ i += 2;
+ if (!final_args)
+ goto fmt_next;
+
+ if (try_get_fmt_tmp_buf(&tmp_buf)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+ if (tmp_buf_len < copy_size) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ unsafe_ptr = (char *)(long)raw_args[num_spec];
+ err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr,
+ copy_size);
+ if (err < 0)
+ memset(tmp_buf, 0, copy_size);
+ cur_arg = (u64)(long)tmp_buf;
+ tmp_buf += copy_size;
+ tmp_buf_len -= copy_size;
+
+ goto fmt_next;
+ } else if (fmt[i] == 's') {
+ cur_mod = BPF_PRINTF_LONG;
+ fmt_ptype = fmt[i];
+fmt_str:
+ if (fmt[i + 1] != 0 &&
+ !isspace(fmt[i + 1]) &&
+ !ispunct(fmt[i + 1])) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!final_args)
+ goto fmt_next;
+
+ if (try_get_fmt_tmp_buf(&tmp_buf)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (!tmp_buf_len) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ unsafe_ptr = (char *)(long)raw_args[num_spec];
+ err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
+ fmt_ptype, tmp_buf_len);
+ if (err < 0) {
+ tmp_buf[0] = '\0';
+ err = 1;
+ }
+
+ cur_arg = (u64)(long)tmp_buf;
+ tmp_buf += err;
+ tmp_buf_len -= err;
+
+ goto fmt_next;
+ }
+
+ cur_mod = BPF_PRINTF_INT;
+
+ if (fmt[i] == 'l') {
+ cur_mod = BPF_PRINTF_LONG;
+ i++;
+ }
+ if (fmt[i] == 'l') {
+ cur_mod = BPF_PRINTF_LONG_LONG;
+ i++;
+ }
+
+ if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
+ fmt[i] != 'x' && fmt[i] != 'X') {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (final_args)
+ cur_arg = raw_args[num_spec];
+fmt_next:
+ if (final_args) {
+ mod[num_spec] = cur_mod;
+ final_args[num_spec] = cur_arg;
+ }
+ num_spec++;
+ }
+
+ err = 0;
+cleanup:
+ if (err)
+ bpf_printf_cleanup();
+out:
+ return err;
+}
+
+#define MAX_SNPRINTF_VARARGS 12
+
+BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
+ const void *, data, u32, data_len)
+{
+ enum bpf_printf_mod_type mod[MAX_SNPRINTF_VARARGS];
+ u64 args[MAX_SNPRINTF_VARARGS];
+ int err, num_args;
+
+ if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
+ num_args = data_len / 8;
+
+ /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
+ * can safely give an unbounded size.
+ */
+ err = bpf_printf_prepare(fmt, UINT_MAX, data, args, mod, num_args);
+ if (err < 0)
+ return err;
+
+ /* Maximumly we can have MAX_SNPRINTF_VARARGS parameters, just give
+ * all of them to snprintf().
+ */
+ err = snprintf(str, str_size, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+ BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+ BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+ BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+ BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+ BPF_CAST_FMT_ARG(11, args, mod));
+
+ bpf_printf_cleanup();
+
+ return err + 1;
+}
+
+const struct bpf_func_proto bpf_snprintf_proto = {
+ .func = bpf_snprintf,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_CONST_STR,
+ .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -757,6 +1061,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_probe_read_kernel_str_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
+ case BPF_FUNC_snprintf:
+ return &bpf_snprintf_proto;
default:
return NULL;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index d2de2abec35b..b4ebd60a6c16 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -816,8 +816,6 @@ static int __init bpf_init(void)
{
int ret;
- mutex_init(&bpf_preload_lock);
-
ret = sysfs_create_mount_point(fs_kobj, "bpf");
if (ret)
return ret;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6428634da57e..fd495190115e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2551,6 +2551,9 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
container_of(link, struct bpf_tracing_link, link);
info->tracing.attach_type = tr_link->attach_type;
+ bpf_trampoline_unpack_key(tr_link->trampoline->key,
+ &info->tracing.target_obj_id,
+ &info->tracing.target_btf_id);
return 0;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5682a02901d3..637462e9b6ee 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4787,6 +4787,7 @@ static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALU
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
+static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4817,6 +4818,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
+ [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -5067,6 +5069,44 @@ skip_type_check:
if (err)
return err;
err = check_ptr_alignment(env, reg, 0, size, true);
+ } else if (arg_type == ARG_PTR_TO_CONST_STR) {
+ struct bpf_map *map = reg->map_ptr;
+ int map_off;
+ u64 map_addr;
+ char *str_ptr;
+
+ if (!bpf_map_is_rdonly(map)) {
+ verbose(env, "R%d does not point to a readonly map'\n", regno);
+ return -EACCES;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "R%d is not a constant address'\n", regno);
+ return -EACCES;
+ }
+
+ if (!map->ops->map_direct_value_addr) {
+ verbose(env, "no direct value access support for this map type\n");
+ return -EACCES;
+ }
+
+ err = check_map_access(env, regno, reg->off,
+ map->value_size - reg->off, false);
+ if (err)
+ return err;
+
+ map_off = reg->off + reg->var_off.value;
+ err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
+ if (err) {
+ verbose(env, "direct value access on string failed\n");
+ return err;
+ }
+
+ str_ptr = (char *)(long)(map_addr);
+ if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
+ verbose(env, "string is not zero-terminated\n");
+ return -EINVAL;
+ }
}
return err;
@@ -5767,6 +5807,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
if (ret_type != RET_INTEGER ||
(func_id != BPF_FUNC_get_stack &&
+ func_id != BPF_FUNC_get_task_stack &&
func_id != BPF_FUNC_probe_read_str &&
func_id != BPF_FUNC_probe_read_kernel_str &&
func_id != BPF_FUNC_probe_read_user_str))
@@ -5877,6 +5918,43 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
+static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs)
+{
+ struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
+ struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
+ struct bpf_map *fmt_map = fmt_reg->map_ptr;
+ int err, fmt_map_off, num_args;
+ u64 fmt_addr;
+ char *fmt;
+
+ /* data must be an array of u64 */
+ if (data_len_reg->var_off.value % 8)
+ return -EINVAL;
+ num_args = data_len_reg->var_off.value / 8;
+
+ /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
+ * and map_direct_value_addr is set.
+ */
+ fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
+ err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
+ fmt_map_off);
+ if (err) {
+ verbose(env, "verifier bug\n");
+ return -EFAULT;
+ }
+ fmt = (char *)(long)fmt_addr + fmt_map_off;
+
+ /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
+ * can focus on validating the format specifiers.
+ */
+ err = bpf_printf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args);
+ if (err < 0)
+ verbose(env, "Invalid format string\n");
+
+ return err;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -5991,6 +6069,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
+ if (func_id == BPF_FUNC_snprintf) {
+ err = check_bpf_snprintf_call(env, regs);
+ if (err < 0)
+ return err;
+ }
+
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0d23755c2747..2a8bcdc927c7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -372,188 +372,38 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
- size_t bufsz)
-{
- void __user *user_ptr = (__force void __user *)unsafe_ptr;
-
- buf[0] = 0;
-
- switch (fmt_ptype) {
- case 's':
-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
- if ((unsigned long)unsafe_ptr < TASK_SIZE) {
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
- }
- fallthrough;
-#endif
- case 'k':
- strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
- break;
- case 'u':
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
- }
-}
-
static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-#define BPF_TRACE_PRINTK_SIZE 1024
+#define MAX_TRACE_PRINTK_VARARGS 3
+#define BPF_TRACE_PRINTK_SIZE 1024
-static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+ u64, arg2, u64, arg3)
{
+ u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
+ enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
static char buf[BPF_TRACE_PRINTK_SIZE];
unsigned long flags;
- va_list ap;
int ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- va_start(ap, fmt);
- ret = vsnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
- /* vsnprintf() will not append null for zero-length strings */
+ ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
+ MAX_TRACE_PRINTK_VARARGS);
+ if (ret < 0)
+ return ret;
+
+ ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
+ /* snprintf() will not append null for zero-length strings */
if (ret == 0)
buf[0] = '\0';
+
+ raw_spin_lock_irqsave(&trace_printk_lock, flags);
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
- return ret;
-}
-
-/*
- * Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
- */
-BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
- u64, arg2, u64, arg3)
-{
- int i, mod[3] = {}, fmt_cnt = 0;
- char buf[64], fmt_ptype;
- void *unsafe_ptr = NULL;
- bool str_seen = false;
+ bpf_printf_cleanup();
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- return -EINVAL;
-
- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
- return -EINVAL;
-
- if (fmt[i] != '%')
- continue;
-
- if (fmt_cnt >= 3)
- return -EINVAL;
-
- /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
- i++;
- if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
- i++;
- } else if (fmt[i] == 'p') {
- mod[fmt_cnt]++;
- if ((fmt[i + 1] == 'k' ||
- fmt[i + 1] == 'u') &&
- fmt[i + 2] == 's') {
- fmt_ptype = fmt[i + 1];
- i += 2;
- goto fmt_str;
- }
-
- if (fmt[i + 1] == 'B') {
- i++;
- goto fmt_next;
- }
-
- /* disallow any further format extensions */
- if (fmt[i + 1] != 0 &&
- !isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
-
- goto fmt_next;
- } else if (fmt[i] == 's') {
- mod[fmt_cnt]++;
- fmt_ptype = fmt[i];
-fmt_str:
- if (str_seen)
- /* allow only one '%s' per fmt string */
- return -EINVAL;
- str_seen = true;
-
- if (fmt[i + 1] != 0 &&
- !isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
-
- switch (fmt_cnt) {
- case 0:
- unsafe_ptr = (void *)(long)arg1;
- arg1 = (long)buf;
- break;
- case 1:
- unsafe_ptr = (void *)(long)arg2;
- arg2 = (long)buf;
- break;
- case 2:
- unsafe_ptr = (void *)(long)arg3;
- arg3 = (long)buf;
- break;
- }
-
- bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
- sizeof(buf));
- goto fmt_next;
- }
-
- if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
- i++;
- }
-
- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x')
- return -EINVAL;
-fmt_next:
- fmt_cnt++;
- }
-
-/* Horrid workaround for getting va_list handling working with different
- * argument type combinations generically for 32 and 64 bit archs.
- */
-#define __BPF_TP_EMIT() __BPF_ARG3_TP()
-#define __BPF_TP(...) \
- bpf_do_trace_printk(fmt, ##__VA_ARGS__)
-
-#define __BPF_ARG1_TP(...) \
- ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_TP(arg1, ##__VA_ARGS__) \
- : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
- : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
-
-#define __BPF_ARG2_TP(...) \
- ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
- : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
- : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
-
-#define __BPF_ARG3_TP(...) \
- ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
- : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
- : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
-
- return __BPF_TP_EMIT();
+ return ret;
}
static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -581,184 +431,37 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
}
#define MAX_SEQ_PRINTF_VARARGS 12
-#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
-#define MAX_SEQ_PRINTF_STR_LEN 128
-
-struct bpf_seq_printf_buf {
- char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
-};
-static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
-static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
const void *, data, u32, data_len)
{
- int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
- int i, buf_used, copy_size, num_args;
- u64 params[MAX_SEQ_PRINTF_VARARGS];
- struct bpf_seq_printf_buf *bufs;
- const u64 *args = data;
-
- buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
- if (WARN_ON_ONCE(buf_used > 1)) {
- err = -EBUSY;
- goto out;
- }
-
- bufs = this_cpu_ptr(&bpf_seq_printf_buf);
-
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- goto out;
-
- if (data_len & 7)
- goto out;
-
- for (i = 0; i < fmt_size; i++) {
- if (fmt[i] == '%') {
- if (fmt[i + 1] == '%')
- i++;
- else if (!data || !data_len)
- goto out;
- }
- }
+ enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
+ u64 args[MAX_SEQ_PRINTF_VARARGS];
+ int err, num_args;
+ if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
num_args = data_len / 8;
- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- /* only printable ascii for now. */
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
- err = -EINVAL;
- goto out;
- }
-
- if (fmt[i] != '%')
- continue;
-
- if (fmt[i + 1] == '%') {
- i++;
- continue;
- }
-
- if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
- err = -E2BIG;
- goto out;
- }
-
- if (fmt_cnt >= num_args) {
- err = -EINVAL;
- goto out;
- }
-
- /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
- i++;
-
- /* skip optional "[0 +-][num]" width formating field */
- while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
- fmt[i] == ' ')
- i++;
- if (fmt[i] >= '1' && fmt[i] <= '9') {
- i++;
- while (fmt[i] >= '0' && fmt[i] <= '9')
- i++;
- }
-
- if (fmt[i] == 's') {
- void *unsafe_ptr;
-
- /* try our best to copy */
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
- unsafe_ptr = (void *)(long)args[fmt_cnt];
- err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN);
- if (err < 0)
- bufs->buf[memcpy_cnt][0] = '\0';
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'p') {
- if (fmt[i + 1] == 0 ||
- fmt[i + 1] == 'K' ||
- fmt[i + 1] == 'x' ||
- fmt[i + 1] == 'B') {
- /* just kernel pointers */
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- continue;
- }
-
- /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
- if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
- err = -EINVAL;
- goto out;
- }
- if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
- err = -EINVAL;
- goto out;
- }
-
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
-
- copy_size = (fmt[i + 2] == '4') ? 4 : 16;
-
- err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- (void *) (long) args[fmt_cnt],
- copy_size);
- if (err < 0)
- memset(bufs->buf[memcpy_cnt], 0, copy_size);
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- i += 2;
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'l') {
- i++;
- if (fmt[i] == 'l')
- i++;
- }
-
- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x' &&
- fmt[i] != 'X') {
- err = -EINVAL;
- goto out;
- }
-
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- }
+ err = bpf_printf_prepare(fmt, fmt_size, data, args, mod, num_args);
+ if (err < 0)
+ return err;
/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
* all of them to seq_printf().
*/
- seq_printf(m, fmt, params[0], params[1], params[2], params[3],
- params[4], params[5], params[6], params[7], params[8],
- params[9], params[10], params[11]);
+ seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+ BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+ BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+ BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+ BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+ BPF_CAST_FMT_ARG(11, args, mod));
- err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
-out:
- this_cpu_dec(bpf_seq_printf_buf_used);
- return err;
+ bpf_printf_cleanup();
+
+ return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}
BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
@@ -1373,6 +1076,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
+ case BPF_FUNC_snprintf:
+ return &bpf_snprintf_proto;
default:
return NULL;
}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8b644113715e..fb3d3262dc1a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -71,6 +71,9 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
if (array == NULL)
return -ENOBUFS;
+ /* paired with smp_rmb() in __vlan_group_get_device() */
+ smp_wmb();
+
vg->vlan_devices_arrays[pidx][vidx] = array;
return 0;
}
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 953405362795..fa3ad3d4d58c 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -57,6 +57,10 @@ static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
array = vg->vlan_devices_arrays[pidx]
[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+
+ /* paired with smp_wmb() in vlan_group_prealloc_vid() */
+ smp_rmb();
+
return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index d9bf63dbe4fd..222b1d322c96 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4723,10 +4723,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
+ bool orig_bcast, orig_host;
u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
- bool orig_bcast;
int off;
/* Reinjected packets coming from act_mirred or similar should
@@ -4773,6 +4773,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
orig_data_end = xdp->data_end;
orig_data = xdp->data;
eth = (struct ethhdr *)xdp->data;
+ orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr);
orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
orig_eth_type = eth->h_proto;
@@ -4800,8 +4801,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* check if XDP changed eth hdr such SKB needs update */
eth = (struct ethhdr *)xdp->data;
if ((orig_eth_type != eth->h_proto) ||
+ (orig_host != ether_addr_equal_64bits(eth->h_dest,
+ skb->dev->dev_addr)) ||
(orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
__skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
}
@@ -5962,7 +5966,7 @@ static void gro_list_prepare(const struct list_head *head,
}
}
-static void skb_gro_reset_offset(struct sk_buff *skb)
+static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
{
const struct skb_shared_info *pinfo = skb_shinfo(skb);
const skb_frag_t *frag0 = &pinfo->frags[0];
@@ -5973,7 +5977,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
if (!skb_headlen(skb) && pinfo->nr_frags &&
!PageHighMem(skb_frag_page(frag0)) &&
- (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+ (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
skb_frag_size(frag0),
@@ -6191,7 +6195,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
- skb_gro_reset_offset(skb);
+ skb_gro_reset_offset(skb, 0);
ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
trace_napi_gro_receive_exit(ret);
@@ -6280,7 +6284,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
napi->skb = NULL;
skb_reset_mac_header(skb);
- skb_gro_reset_offset(skb);
+ skb_gro_reset_offset(skb, hlen);
if (unlikely(skb_gro_header_hard(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 737b61c2976e..4eb969518ee0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8599,9 +8599,10 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
* @controller: associated controller number for the devlink port instance
* @pf: associated PF for the devlink port instance
* @sf: associated SF of a PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
*/
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, u32 sf)
+ u16 pf, u32 sf, bool external)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
@@ -8615,6 +8616,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
attrs->pci_sf.controller = controller;
attrs->pci_sf.pf = pf;
attrs->pci_sf.sf = sf;
+ attrs->pci_sf.external = external;
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
@@ -8667,6 +8669,13 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
attrs->pci_vf.pf, attrs->pci_vf.vf);
break;
case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ if (attrs->pci_sf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
attrs->pci_sf.sf);
break;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8379719d1dce..98f20efbfadf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -131,6 +131,9 @@ static void neigh_update_gc_list(struct neighbour *n)
write_lock_bh(&n->tbl->lock);
write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+
/* remove from the gc list if new state is permanent or if neighbor
* is externally learned; otherwise entry should be on the gc list
*/
@@ -147,6 +150,7 @@ static void neigh_update_gc_list(struct neighbour *n)
atomic_inc(&n->tbl->gc_entries);
}
+out:
write_unlock(&n->lock);
write_unlock_bh(&n->tbl->lock);
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 3d190d22b0d8..6f1b82b8ad49 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -188,7 +188,7 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
if (!sk->sk_prot->psock_update_sk_prot)
return -EINVAL;
psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;
- return sk->sk_prot->psock_update_sk_prot(sk, false);
+ return sk->sk_prot->psock_update_sk_prot(sk, psock, false);
}
static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
@@ -1521,7 +1521,7 @@ void sock_map_close(struct sock *sk, long timeout)
lock_sock(sk);
rcu_read_lock();
- psock = sk_psock(sk);
+ psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
@@ -1532,6 +1532,7 @@ void sock_map_close(struct sock *sk, long timeout)
sock_map_remove_links(sk, psock);
rcu_read_unlock();
sk_psock_stop(psock, true);
+ sk_psock_put(sk, psock);
release_sock(sk);
saved_close(sk, timeout);
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 4f49c12dae53..ad9d17923fc5 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -499,9 +499,8 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
}
-int tcp_bpf_update_proto(struct sock *sk, bool restore)
+int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
- struct sk_psock *psock = sk_psock(sk);
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7d5c4ebf42fe..954c4591a6fd 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -103,14 +103,12 @@ static int __init udp_bpf_v4_build_proto(void)
}
core_initcall(udp_bpf_v4_build_proto);
-int udp_bpf_update_proto(struct sock *sk, bool restore)
+int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
- struct sk_psock *psock = sk_psock(sk);
if (restore) {
sk->sk_write_space = psock->saved_write_space;
- /* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, psock->sk_proto);
return 0;
}
@@ -118,7 +116,6 @@ int udp_bpf_update_proto(struct sock *sk, bool restore)
if (sk->sk_family == AF_INET6)
udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
- /* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]);
return 0;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8bf21996734d..29a2d690d8d5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -392,6 +392,14 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
return false;
}
+static void mptcp_set_datafin_timeout(const struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
+ TCP_RTO_MIN << icsk->icsk_retransmits);
+}
+
static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
{
long tout = ssk && inet_csk(ssk)->icsk_pending ?
@@ -1062,7 +1070,7 @@ out:
}
if (snd_una == READ_ONCE(msk->snd_nxt)) {
- if (msk->timer_ival)
+ if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
mptcp_stop_timer(sk);
} else {
mptcp_reset_timer(sk);
@@ -2287,8 +2295,19 @@ static void __mptcp_retrans(struct sock *sk)
__mptcp_clean_una_wakeup(sk);
dfrag = mptcp_rtx_head(sk);
- if (!dfrag)
+ if (!dfrag) {
+ if (mptcp_data_fin_enabled(msk)) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ icsk->icsk_retransmits++;
+ mptcp_set_datafin_timeout(sk);
+ mptcp_send_ack(msk);
+
+ goto reset_timer;
+ }
+
return;
+ }
ssk = mptcp_subflow_get_retrans(msk);
if (!ssk)
@@ -2474,6 +2493,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
pr_debug("Sending DATA_FIN on subflow %p", ssk);
mptcp_set_timeout(sk, ssk);
tcp_send_ack(ssk);
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
}
break;
}
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 15424d26e85d..96b524ceabca 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -392,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
*
* Start with a full bucket.
*/
- band->bucket = (band->burst_size + band->rate) * 1000ULL;
+ band->bucket = band->burst_size * 1000ULL;
band_max_delta_t = div_u64(band->bucket, band->rate);
if (band_max_delta_t > meter->max_delta_t)
meter->max_delta_t = band_max_delta_t;
@@ -641,7 +641,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
long long int max_bucket_size;
band = &meter->bands[i];
- max_bucket_size = (band->burst_size + band->rate) * 1000LL;
+ max_bucket_size = band->burst_size * 1000LL;
band->bucket += delta_ms * band->rate;
if (band->bucket > max_bucket_size)
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index 2bf2b1943e61..fa611678af05 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -50,6 +50,9 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
int rc;
+ if (skb->sk)
+ sock_hold(skb->sk);
+
rc = skb_linearize(skb);
if (rc)
goto free_skb;
@@ -59,12 +62,11 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
if (rc)
goto free_skb;
- if (skb->sk)
- sock_hold(skb->sk);
-
return rc;
free_skb:
+ if (skb->sk)
+ sock_put(skb->sk);
kfree_skb(skb);
return rc;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 922ed6b91abb..5c91df52b8c2 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -945,6 +945,12 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
list_for_each_entry(entry, &new->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
+
+ if (!cycle) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e4370b1b7494..902cb6dd710b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -733,6 +733,23 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
return t->send_pkt(reply);
}
+/* This function should be called with sk_lock held and SOCK_DONE set */
+static void virtio_transport_remove_sock(struct vsock_sock *vsk)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct virtio_vsock_pkt *pkt, *tmp;
+
+ /* We don't need to take rx_lock, as the socket is closing and we are
+ * removing it.
+ */
+ list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+
+ vsock_remove_sock(vsk);
+}
+
static void virtio_transport_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
@@ -765,7 +782,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
(!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
vsk->close_work_scheduled = false;
- vsock_remove_sock(vsk);
+ virtio_transport_remove_sock(vsk);
/* Release refcnt obtained when we scheduled the timeout */
sock_put(sk);
@@ -828,22 +845,15 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
void virtio_transport_release(struct vsock_sock *vsk)
{
- struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt, *tmp;
struct sock *sk = &vsk->sk;
bool remove_sock = true;
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
- list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
-
if (remove_sock) {
sock_set_flag(sk, SOCK_DONE);
- vsock_remove_sock(vsk);
+ virtio_transport_remove_sock(vsk);
}
}
EXPORT_SYMBOL_GPL(virtio_transport_release);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8b65323207db..1c9ecb18b8e6 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -568,8 +568,7 @@ vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
out:
if (err < 0) {
- pr_err("Could not attach to queue pair with %d\n",
- err);
+ pr_err_once("Could not attach to queue pair with %d\n", err);
err = vmci_transport_error_to_vsock_error(err);
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a71ed664da0a..cd62d4ba87a9 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -30,7 +30,7 @@
#include "xdp_umem.h"
#include "xsk.h"
-#define TX_BATCH_SIZE 16
+#define TX_BATCH_SIZE 32
static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 3f4599c9a202..ef30d2b353b0 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -26,7 +26,7 @@
SEC("kprobe/__netif_receive_skb_core")
int bpf_prog1(struct pt_regs *ctx)
{
- /* attaches to kprobe netif_receive_skb,
+ /* attaches to kprobe __netif_receive_skb_core,
* looks for packets on loobpack device and prints them
*/
char devname[IFNAMSIZ];
@@ -35,7 +35,7 @@ int bpf_prog1(struct pt_regs *ctx)
int len;
/* non-portable! works for the given kernel only */
- skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
+ bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
dev = _(skb->dev);
len = _(skb->len);
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 3b261b0f74f0..667aacb9261c 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -213,6 +213,7 @@ vmlinux_link()
gen_btf()
{
local pahole_ver
+ local extra_paholeopt=
if ! [ -x "$(command -v ${PAHOLE})" ]; then
echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
@@ -227,8 +228,12 @@ gen_btf()
vmlinux_link ${1}
+ if [ "${pahole_ver}" -ge "121" ]; then
+ extra_paholeopt="${extra_paholeopt} --btf_gen_floats"
+ fi
+
info "BTF" ${2}
- LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
+ LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1}
# Create ${2} which contains just .BTF section but no symbols. Add
# SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 62953bbf68b4..385d5c955cf3 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -71,7 +71,9 @@ static const char *btf_var_linkage_str(__u32 linkage)
case BTF_VAR_STATIC:
return "static";
case BTF_VAR_GLOBAL_ALLOCATED:
- return "global-alloc";
+ return "global";
+ case BTF_VAR_GLOBAL_EXTERN:
+ return "extern";
default:
return "(unknown)";
}
@@ -98,26 +100,28 @@ static const char *btf_str(const struct btf *btf, __u32 off)
return btf__name_by_offset(btf, off) ? : "(invalid)";
}
+static int btf_kind_safe(int kind)
+{
+ return kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+}
+
static int dump_btf_type(const struct btf *btf, __u32 id,
const struct btf_type *t)
{
json_writer_t *w = json_wtr;
- int kind, safe_kind;
-
- kind = BTF_INFO_KIND(t->info);
- safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+ int kind = btf_kind(t);
if (json_output) {
jsonw_start_object(w);
jsonw_uint_field(w, "id", id);
- jsonw_string_field(w, "kind", btf_kind_str[safe_kind]);
+ jsonw_string_field(w, "kind", btf_kind_str[btf_kind_safe(kind)]);
jsonw_string_field(w, "name", btf_str(btf, t->name_off));
} else {
- printf("[%u] %s '%s'", id, btf_kind_str[safe_kind],
+ printf("[%u] %s '%s'", id, btf_kind_str[btf_kind_safe(kind)],
btf_str(btf, t->name_off));
}
- switch (BTF_INFO_KIND(t->info)) {
+ switch (kind) {
case BTF_KIND_INT: {
__u32 v = *(__u32 *)(t + 1);
const char *enc;
@@ -300,7 +304,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
break;
}
case BTF_KIND_DATASEC: {
- const struct btf_var_secinfo *v = (const void *)(t+1);
+ const struct btf_var_secinfo *v = (const void *)(t + 1);
+ const struct btf_type *vt;
__u16 vlen = BTF_INFO_VLEN(t->info);
int i;
@@ -322,6 +327,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
} else {
printf("\n\ttype_id=%u offset=%u size=%u",
v->type, v->offset, v->size);
+
+ if (v->type <= btf__get_nr_types(btf)) {
+ vt = btf__type_by_id(btf, v->type);
+ printf(" (%s '%s')",
+ btf_kind_str[btf_kind_safe(btf_kind(vt))],
+ btf_str(btf, vt->name_off));
+ }
}
}
if (json_output)
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index ff3aa0cf3997..f836d115d7d6 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -157,7 +157,7 @@ static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
if (len == 0)
break;
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != nl_pid) {
ret = -LIBBPF_ERRNO__WRNGPID;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 69902603012c..ec6d85a81744 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -312,6 +312,27 @@ union bpf_iter_link_info {
* *ctx_out*, *data_out* (for example, packet data), result of the
* execution *retval*, and *duration* of the test run.
*
+ * The sizes of the buffers provided as input and output
+ * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ * be provided in the corresponding variables *ctx_size_in*,
+ * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ * of these parameters are not provided (ie set to NULL), the
+ * corresponding size field must be zero.
+ *
+ * Some program types have particular requirements:
+ *
+ * **BPF_PROG_TYPE_SK_LOOKUP**
+ * *data_in* and *data_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_XDP**
+ * *ctx_in* and *ctx_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ * *ctx_out*, *data_in* and *data_out* must be NULL.
+ * *repeat* must be zero.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
@@ -4061,12 +4082,20 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * An adaptive notification is a notification sent whenever the user-space
+ * process has caught up and consumed all available payloads. In case the user-space
+ * process is still processing a previous payload, then no notification is needed
+ * as it will process the newly added payload automatically.
* Return
* 0 on success, or a negative error in case of failure.
*
* void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
* Description
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * *flags* must be 0.
* Return
* Valid pointer with *size* bytes of memory available; NULL,
* otherwise.
@@ -4078,6 +4107,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4088,6 +4121,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4578,7 +4615,7 @@ union bpf_attr {
*
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
* Description
- * Check ctx packet size against exceeding MTU of net device (based
+ * Check packet size against exceeding MTU of net device (based
* on *ifindex*). This helper will likely be used in combination
* with helpers that adjust/change the packet size.
*
@@ -4595,6 +4632,14 @@ union bpf_attr {
* against the current net device. This is practical if this isn't
* used prior to redirect.
*
+ * On input *mtu_len* must be a valid pointer, else verifier will
+ * reject BPF program. If the value *mtu_len* is initialized to
+ * zero then the ctx packet size is use. When value *mtu_len* is
+ * provided as input this specify the L3 length that the MTU check
+ * is done against. Remember XDP and TC length operate at L2, but
+ * this value is L3 as this correlate to MTU and IP-header tot_len
+ * values which are L3 (similar behavior as bpf_fib_lookup).
+ *
* The Linux kernel route table can configure MTUs on a more
* specific per route level, which is not provided by this helper.
* For route level MTU checks use the **bpf_fib_lookup**\ ()
@@ -4619,11 +4664,9 @@ union bpf_attr {
*
* On return *mtu_len* pointer contains the MTU value of the net
* device. Remember the net device configured MTU is the L3 size,
- * which is returned here and XDP and TX length operate at L2.
+ * which is returned here and XDP and TC length operate at L2.
* Helper take this into account for you, but remember when using
- * MTU value in your BPF-code. On input *mtu_len* must be a valid
- * pointer and be initialized (to zero), else verifier will reject
- * BPF program.
+ * MTU value in your BPF-code.
*
* Return
* * 0 on success, and populate MTU value in *mtu_len* pointer.
@@ -4665,6 +4708,33 @@ union bpf_attr {
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ * Description
+ * Outputs a string into the **str** buffer of size **str_size**
+ * based on a format string stored in a read-only map pointed by
+ * **fmt**.
+ *
+ * Each format specifier in **fmt** corresponds to one u64 element
+ * in the **data** array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data*
+ * array. The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ * memory. Reading kernel memory may fail due to either invalid
+ * address or valid address but requiring a major memory fault. If
+ * reading kernel memory fails, the string for **%s** will be an
+ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ * Not returning error to bpf program is consistent with what
+ * **bpf_trace_printk**\ () does for now.
+ *
+ * Return
+ * The strictly positive length of the formatted string, including
+ * the trailing zero character. If the return value is greater than
+ * **str_size**, **str** contains a truncated string, guaranteed to
+ * be zero-terminated except when **str_size** is 0.
+ *
+ * Or **-EBUSY** if the per-CPU memory copy buffer is busy.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4832,6 +4902,7 @@ union bpf_attr {
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
+ FN(snprintf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5373,6 +5444,8 @@ struct bpf_link_info {
} raw_tracepoint;
struct {
__u32 attach_type;
+ __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+ __u32 target_btf_id; /* BTF type id inside the object */
} tracing;
struct {
__u64 cgroup_id;
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index cc2e51c64a54..9720dc0b4605 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -25,9 +25,16 @@
/*
* Helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
- * are interpreted by elf_bpf loader
+ * are interpreted by libbpf depending on the context (BPF programs, BPF maps,
+ * extern variables, etc).
+ * To allow use of SEC() with externs (e.g., for extern .maps declarations),
+ * make sure __attribute__((unused)) doesn't trigger compilation warning.
*/
-#define SEC(NAME) __attribute__((section(NAME), used))
+#define SEC(name) \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \
+ __attribute__((section(name), used)) \
+ _Pragma("GCC diagnostic pop") \
/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
#undef __always_inline
@@ -40,6 +47,14 @@
#define __weak __attribute__((weak))
#endif
+/*
+ * Use __hidden attribute to mark a non-static BPF subprogram effectively
+ * static for BPF verifier's verification algorithm purposes, allowing more
+ * extensive and permissive BPF verification process, taking into account
+ * subprogram's caller context.
+ */
+#define __hidden __attribute__((visibility("hidden")))
+
/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
* any system-level headers (such as stddef.h, linux/version.h, etc), and
* commonly-used macros like NULL and KERNEL_VERSION aren't available through
@@ -51,7 +66,7 @@
#endif
#ifndef KERNEL_VERSION
-#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))
+#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
#endif
/*
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index f9ef37707888..8c954ebc0c7c 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -413,20 +413,56 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
/*
* BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
* in a structure.
*/
-#define BPF_SEQ_PRINTF(seq, fmt, args...) \
- ({ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[] = { args }; \
- _Pragma("GCC diagnostic pop") \
- int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
- ___param, sizeof(___param)); \
- ___ret; \
- })
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_snprintf(out, out_size, ___fmt, \
+ ___param, sizeof(___param)); \
+})
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d30e67e7e1e5..d57e13a13798 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1605,11 +1605,6 @@ static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
btf->hdr->type_len, UINT_MAX, add_sz);
}
-static __u32 btf_type_info(int kind, int vlen, int kflag)
-{
- return (kflag << 31) | (kind << 24) | vlen;
-}
-
static void btf_type_inc_vlen(struct btf_type *t)
{
t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7aad78dbb4b4..a1cddd17af7d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -69,8 +69,7 @@
#define __printf(a, b) __attribute__((format(printf, a, b)))
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static const struct btf_type *
-skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
static int __base_pr(enum libbpf_print_level level, const char *format,
va_list args)
@@ -195,7 +194,6 @@ struct reloc_desc {
int insn_idx;
int map_idx;
int sym_off;
- bool processed;
};
struct bpf_sec_def;
@@ -275,6 +273,7 @@ struct bpf_program {
bpf_program_clear_priv_t clear_priv;
bool load;
+ bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
int prog_ifindex;
@@ -501,8 +500,6 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym);
void bpf_program__unload(struct bpf_program *prog)
{
@@ -643,25 +640,29 @@ static int
bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
const char *sec_name, int sec_idx)
{
+ Elf_Data *symbols = obj->efile.symbols;
struct bpf_program *prog, *progs;
void *data = sec_data->d_buf;
- size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
- int nr_progs, err;
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
+ int nr_progs, err, i;
const char *name;
GElf_Sym sym;
progs = obj->programs;
nr_progs = obj->nr_programs;
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
sec_off = 0;
- while (sec_off < sec_sz) {
- if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
- pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
- sec_name, sec_off);
- return -LIBBPF_ERRNO__FORMAT;
- }
+ for (i = 0; i < nr_syms; i++) {
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != sec_idx)
+ continue;
+ if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+ continue;
prog_sz = sym.st_size;
+ sec_off = sym.st_value;
name = elf_sym_str(obj, sym.st_name);
if (!name) {
@@ -699,10 +700,17 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
if (err)
return err;
+ /* if function is a global/weak symbol, but has hidden
+ * visibility (STV_HIDDEN), mark its BTF FUNC as static to
+ * enable more permissive BPF verification mode with more
+ * outside context available to BPF verifier
+ */
+ if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
+ && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN)
+ prog->mark_btf_static = true;
+
nr_progs++;
obj->nr_programs = nr_progs;
-
- sec_off += prog_sz;
}
return 0;
@@ -1896,7 +1904,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
return 0;
}
-static const struct btf_type *
+const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
@@ -1951,16 +1959,11 @@ static const char *__btf_kind_str(__u16 kind)
}
}
-static const char *btf_kind_str(const struct btf_type *t)
+const char *btf_kind_str(const struct btf_type *t)
{
return __btf_kind_str(btf_kind(t));
}
-static enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
-{
- return (enum btf_func_linkage)BTF_INFO_VLEN(t->info);
-}
-
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -2015,254 +2018,262 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
return bpf_map__set_pin_path(map, buf);
}
-
-static int parse_btf_map_def(struct bpf_object *obj,
- struct bpf_map *map,
- const struct btf_type *def,
- bool strict, bool is_inner,
- const char *pin_root_path)
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def)
{
const struct btf_type *t;
const struct btf_member *m;
+ bool is_inner = inner_def == NULL;
int vlen, i;
- vlen = btf_vlen(def);
- m = btf_members(def);
+ vlen = btf_vlen(def_t);
+ m = btf_members(def_t);
for (i = 0; i < vlen; i++, m++) {
- const char *name = btf__name_by_offset(obj->btf, m->name_off);
+ const char *name = btf__name_by_offset(btf, m->name_off);
if (!name) {
- pr_warn("map '%s': invalid field #%d.\n", map->name, i);
+ pr_warn("map '%s': invalid field #%d.\n", map_name, i);
return -EINVAL;
}
if (strcmp(name, "type") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.type))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
return -EINVAL;
- pr_debug("map '%s': found type = %u.\n",
- map->name, map->def.type);
+ map_def->parts |= MAP_DEF_MAP_TYPE;
} else if (strcmp(name, "max_entries") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.max_entries))
+ if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
return -EINVAL;
- pr_debug("map '%s': found max_entries = %u.\n",
- map->name, map->def.max_entries);
+ map_def->parts |= MAP_DEF_MAX_ENTRIES;
} else if (strcmp(name, "map_flags") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.map_flags))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
return -EINVAL;
- pr_debug("map '%s': found map_flags = %u.\n",
- map->name, map->def.map_flags);
+ map_def->parts |= MAP_DEF_MAP_FLAGS;
} else if (strcmp(name, "numa_node") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+ if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
return -EINVAL;
- pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
+ map_def->parts |= MAP_DEF_NUMA_NODE;
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found key_size = %u.\n",
- map->name, sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %u.\n",
- map->name, map->def.key_size, sz);
+ map_name, map_def->key_size, sz);
return -EINVAL;
}
- map->def.key_size = sz;
+ map_def->key_size = sz;
+ map_def->parts |= MAP_DEF_KEY_SIZE;
} else if (strcmp(name, "key") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': key type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': key spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found key [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %zd.\n",
- map->name, map->def.key_size, (ssize_t)sz);
+ map_name, map_def->key_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.key_size = sz;
- map->btf_key_type_id = t->type;
+ map_def->key_size = sz;
+ map_def->key_type_id = t->type;
+ map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
} else if (strcmp(name, "value_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found value_size = %u.\n",
- map->name, sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %u.\n",
- map->name, map->def.value_size, sz);
+ map_name, map_def->value_size, sz);
return -EINVAL;
}
- map->def.value_size = sz;
+ map_def->value_size = sz;
+ map_def->parts |= MAP_DEF_VALUE_SIZE;
} else if (strcmp(name, "value") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': value type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': value spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found value [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %zd.\n",
- map->name, map->def.value_size, (ssize_t)sz);
+ map_name, map_def->value_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.value_size = sz;
- map->btf_value_type_id = t->type;
+ map_def->value_size = sz;
+ map_def->value_type_id = t->type;
+ map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
}
else if (strcmp(name, "values") == 0) {
+ char inner_map_name[128];
int err;
if (is_inner) {
pr_warn("map '%s': multi-level inner maps not supported.\n",
- map->name);
+ map_name);
return -ENOTSUP;
}
if (i != vlen - 1) {
pr_warn("map '%s': '%s' member should be last.\n",
- map->name, name);
+ map_name, name);
return -EINVAL;
}
- if (!bpf_map_type__is_map_in_map(map->def.type)) {
+ if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
pr_warn("map '%s': should be map-in-map.\n",
- map->name);
+ map_name);
return -ENOTSUP;
}
- if (map->def.value_size && map->def.value_size != 4) {
+ if (map_def->value_size && map_def->value_size != 4) {
pr_warn("map '%s': conflicting value size %u != 4.\n",
- map->name, map->def.value_size);
+ map_name, map_def->value_size);
return -EINVAL;
}
- map->def.value_size = 4;
- t = btf__type_by_id(obj->btf, m->type);
+ map_def->value_size = 4;
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_array(t) || btf_array(t)->nelems) {
pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
- map->name);
+ map_name);
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
- NULL);
+ t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
if (!btf_is_ptr(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ t = skip_mods_and_typedefs(btf, t->type, NULL);
if (!btf_is_struct(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- map->inner_map = calloc(1, sizeof(*map->inner_map));
- if (!map->inner_map)
- return -ENOMEM;
- map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
- map->inner_map->name = malloc(strlen(map->name) +
- sizeof(".inner") + 1);
- if (!map->inner_map->name)
- return -ENOMEM;
- sprintf(map->inner_map->name, "%s.inner", map->name);
-
- err = parse_btf_map_def(obj, map->inner_map, t, strict,
- true /* is_inner */, NULL);
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
+ err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
if (err)
return err;
+
+ map_def->parts |= MAP_DEF_INNER_MAP;
} else if (strcmp(name, "pinning") == 0) {
__u32 val;
- int err;
if (is_inner) {
- pr_debug("map '%s': inner def can't be pinned.\n",
- map->name);
+ pr_warn("map '%s': inner def can't be pinned.\n", map_name);
return -EINVAL;
}
- if (!get_map_field_int(map->name, obj->btf, m, &val))
+ if (!get_map_field_int(map_name, btf, m, &val))
return -EINVAL;
- pr_debug("map '%s': found pinning = %u.\n",
- map->name, val);
-
- if (val != LIBBPF_PIN_NONE &&
- val != LIBBPF_PIN_BY_NAME) {
+ if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
pr_warn("map '%s': invalid pinning value %u.\n",
- map->name, val);
+ map_name, val);
return -EINVAL;
}
- if (val == LIBBPF_PIN_BY_NAME) {
- err = build_map_pin_path(map, pin_root_path);
- if (err) {
- pr_warn("map '%s': couldn't build pin path.\n",
- map->name);
- return err;
- }
- }
+ map_def->pinning = val;
+ map_def->parts |= MAP_DEF_PINNING;
} else {
if (strict) {
- pr_warn("map '%s': unknown field '%s'.\n",
- map->name, name);
+ pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
return -ENOTSUP;
}
- pr_debug("map '%s': ignoring unknown field '%s'.\n",
- map->name, name);
+ pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
}
}
- if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
- pr_warn("map '%s': map type isn't specified.\n", map->name);
+ if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
+ pr_warn("map '%s': map type isn't specified.\n", map_name);
return -EINVAL;
}
return 0;
}
+static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
+{
+ map->def.type = def->map_type;
+ map->def.key_size = def->key_size;
+ map->def.value_size = def->value_size;
+ map->def.max_entries = def->max_entries;
+ map->def.map_flags = def->map_flags;
+
+ map->numa_node = def->numa_node;
+ map->btf_key_type_id = def->key_type_id;
+ map->btf_value_type_id = def->value_type_id;
+
+ if (def->parts & MAP_DEF_MAP_TYPE)
+ pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
+
+ if (def->parts & MAP_DEF_KEY_TYPE)
+ pr_debug("map '%s': found key [%u], sz = %u.\n",
+ map->name, def->key_type_id, def->key_size);
+ else if (def->parts & MAP_DEF_KEY_SIZE)
+ pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
+
+ if (def->parts & MAP_DEF_VALUE_TYPE)
+ pr_debug("map '%s': found value [%u], sz = %u.\n",
+ map->name, def->value_type_id, def->value_size);
+ else if (def->parts & MAP_DEF_VALUE_SIZE)
+ pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
+
+ if (def->parts & MAP_DEF_MAX_ENTRIES)
+ pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
+ if (def->parts & MAP_DEF_MAP_FLAGS)
+ pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags);
+ if (def->parts & MAP_DEF_PINNING)
+ pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
+ if (def->parts & MAP_DEF_NUMA_NODE)
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
+
+ if (def->parts & MAP_DEF_INNER_MAP)
+ pr_debug("map '%s': found inner map definition.\n", map->name);
+}
+
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
const struct btf_type *sec,
int var_idx, int sec_idx,
const Elf_Data *data, bool strict,
const char *pin_root_path)
{
+ struct btf_map_def map_def = {}, inner_def = {};
const struct btf_type *var, *def;
const struct btf_var_secinfo *vi;
const struct btf_var *var_extra;
const char *map_name;
struct bpf_map *map;
+ int err;
vi = btf_var_secinfos(sec) + var_idx;
var = btf__type_by_id(obj->btf, vi->type);
@@ -2316,7 +2327,35 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
map_name, map->sec_idx, map->sec_offset);
- return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+ err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
+ if (err)
+ return err;
+
+ fill_map_from_def(map, &map_def);
+
+ if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
+ err = build_map_pin_path(map, pin_root_path);
+ if (err) {
+ pr_warn("map '%s': couldn't build pin path.\n", map->name);
+ return err;
+ }
+ }
+
+ if (map_def.parts & MAP_DEF_INNER_MAP) {
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
+ if (!map->inner_map)
+ return -ENOMEM;
+ map->inner_map->fd = -1;
+ map->inner_map->sec_idx = sec_idx;
+ map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
+ if (!map->inner_map->name)
+ return -ENOMEM;
+ sprintf(map->inner_map->name, "%s.inner", map_name);
+
+ fill_map_from_def(map->inner_map, &inner_def);
+ }
+
+ return 0;
}
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
@@ -2618,7 +2657,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
struct btf *kern_btf = obj->btf;
bool btf_mandatory, sanitize;
- int err = 0;
+ int i, err = 0;
if (!obj->btf)
return 0;
@@ -2632,6 +2671,38 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return 0;
}
+ /* Even though some subprogs are global/weak, user might prefer more
+ * permissive BPF verification process that BPF verifier performs for
+ * static functions, taking into account more context from the caller
+ * functions. In such case, they need to mark such subprogs with
+ * __attribute__((visibility("hidden"))) and libbpf will adjust
+ * corresponding FUNC BTF type to be marked as static and trigger more
+ * involved BPF verification process.
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *prog = &obj->programs[i];
+ struct btf_type *t;
+ const char *name;
+ int j, n;
+
+ if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
+ continue;
+
+ n = btf__get_nr_types(obj->btf);
+ for (j = 1; j <= n; j++) {
+ t = btf_type_by_id(obj->btf, j);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
+ continue;
+
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, prog->name) != 0)
+ continue;
+
+ t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
+ break;
+ }
+ }
+
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
const void *raw_data;
@@ -2782,26 +2853,6 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
return data;
}
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym)
-{
- Elf_Data *symbols = obj->efile.symbols;
- size_t n = symbols->d_size / sizeof(GElf_Sym);
- int i;
-
- for (i = 0; i < n; i++) {
- if (!gelf_getsym(symbols, i, sym))
- continue;
- if (sym->st_shndx != sec_idx || sym->st_value != off)
- continue;
- if (GELF_ST_TYPE(sym->st_info) != sym_type)
- continue;
- return 0;
- }
-
- return -ENOENT;
-}
-
static bool is_sec_name_dwarf(const char *name)
{
/* approximation, but the actual list is too long */
@@ -3498,8 +3549,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
const char *sym_sec_name;
struct bpf_map *map;
- reloc_desc->processed = false;
-
if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code);
@@ -3682,11 +3731,16 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
int err, i, nrels;
const char *sym_name;
__u32 insn_idx;
+ Elf_Scn *scn;
+ Elf_Data *scn_data;
GElf_Sym sym;
GElf_Rel rel;
+ scn = elf_sec_by_idx(obj, sec_idx);
+ scn_data = elf_sec_data(obj, scn);
+
relo_sec_name = elf_sec_str(obj, shdr->sh_name);
- sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ sec_name = elf_sec_name(obj, scn);
if (!relo_sec_name || !sec_name)
return -EINVAL;
@@ -3704,7 +3758,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (rel.r_offset % BPF_INSN_SZ) {
+
+ if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) {
pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
@@ -3728,9 +3783,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
if (!prog) {
- pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+ pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
relo_sec_name, i, sec_name, insn_idx);
- return -LIBBPF_ERRNO__RELOC;
+ continue;
}
relos = libbpf_reallocarray(prog->reloc_desc,
@@ -3845,6 +3900,14 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
return map->def.max_entries;
}
+struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
+{
+ if (!bpf_map_type__is_map_in_map(map->def.type))
+ return NULL;
+
+ return map->inner_map;
+}
+
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
if (map->fd >= 0)
@@ -6305,13 +6368,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
case RELO_LD64:
insn[0].src_reg = BPF_PSEUDO_MAP_FD;
insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
break;
case RELO_DATA:
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
insn[1].imm = insn[0].imm + relo->sym_off;
insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
break;
case RELO_EXTERN_VAR:
ext = &obj->externs[relo->sym_off];
@@ -6329,13 +6390,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[1].imm = ext->ksym.addr >> 32;
}
}
- relo->processed = true;
break;
case RELO_EXTERN_FUNC:
ext = &obj->externs[relo->sym_off];
insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
insn[0].imm = ext->ksym.kernel_btf_id;
- relo->processed = true;
break;
case RELO_SUBPROG_ADDR:
insn[0].src_reg = BPF_PSEUDO_FUNC;
@@ -6621,9 +6680,6 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* different main programs */
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
- if (relo)
- relo->processed = true;
-
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
}
@@ -6716,7 +6772,7 @@ static int
bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_program *subprog;
- int i, j, err;
+ int i, err;
/* mark all subprogs as not relocated (yet) within the context of
* current main program
@@ -6727,9 +6783,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
continue;
subprog->sub_insn_off = 0;
- for (j = 0; j < subprog->nr_reloc; j++)
- if (subprog->reloc_desc[j].type == RELO_CALL)
- subprog->reloc_desc[j].processed = false;
}
err = bpf_object__reloc_code(obj, prog, prog);
@@ -6976,7 +7029,7 @@ static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id
return false;
}
-static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_insn *insn = prog->insns;
enum bpf_func_id func_id;
@@ -9476,6 +9529,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
pr_warn("error: inner_map_fd already specified\n");
return -EINVAL;
}
+ zfree(&map->inner_map);
map->inner_map_fd = fd;
return 0;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f500621d28e5..bec4e6a6e31d 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -480,6 +480,7 @@ LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
LIBBPF_API long libbpf_get_error(const void *ptr);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f5990f7208ce..b9b29baf1df8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,5 +359,6 @@ LIBBPF_0.4.0 {
bpf_linker__finalize;
bpf_linker__free;
bpf_linker__new;
+ bpf_map__inner_map;
bpf_object__set_kversion;
} LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 6017902c687e..ee426226928f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -19,6 +19,7 @@
#pragma GCC poison reallocarray
#include "libbpf.h"
+#include "btf.h"
#ifndef EM_BPF
#define EM_BPF 247
@@ -131,6 +132,50 @@ struct btf;
struct btf_type;
struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+const char *btf_kind_str(const struct btf_type *t);
+const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+
+static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+ return (enum btf_func_linkage)(int)btf_vlen(t);
+}
+
+static inline __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+ return (kflag << 31) | (kind << 24) | vlen;
+}
+
+enum map_def_parts {
+ MAP_DEF_MAP_TYPE = 0x001,
+ MAP_DEF_KEY_TYPE = 0x002,
+ MAP_DEF_KEY_SIZE = 0x004,
+ MAP_DEF_VALUE_TYPE = 0x008,
+ MAP_DEF_VALUE_SIZE = 0x010,
+ MAP_DEF_MAX_ENTRIES = 0x020,
+ MAP_DEF_MAP_FLAGS = 0x040,
+ MAP_DEF_NUMA_NODE = 0x080,
+ MAP_DEF_PINNING = 0x100,
+ MAP_DEF_INNER_MAP = 0x200,
+
+ MAP_DEF_ALL = 0x3ff, /* combination of all above */
+};
+
+struct btf_map_def {
+ enum map_def_parts parts;
+ __u32 map_type;
+ __u32 key_type_id;
+ __u32 key_size;
+ __u32 value_type_id;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ __u32 numa_node;
+ __u32 pinning;
+};
+
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def);
void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
size_t cur_cnt, size_t max_cnt, size_t add_cnt);
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 46b16cbdcda3..9de084b1c699 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -22,6 +22,8 @@
#include "libbpf_internal.h"
#include "strset.h"
+#define BTF_EXTERN_SEC ".extern"
+
struct src_sec {
const char *sec_name;
/* positional (not necessarily ELF) index in an array of sections */
@@ -74,11 +76,36 @@ struct btf_ext_sec_data {
void *recs;
};
+struct glob_sym {
+ /* ELF symbol index */
+ int sym_idx;
+ /* associated section id for .ksyms, .kconfig, etc, but not .extern */
+ int sec_id;
+ /* extern name offset in STRTAB */
+ int name_off;
+ /* optional associated BTF type ID */
+ int btf_id;
+ /* BTF type ID to which VAR/FUNC type is pointing to; used for
+ * rewriting types when extern VAR/FUNC is resolved to a concrete
+ * definition
+ */
+ int underlying_btf_id;
+ /* sec_var index in the corresponding dst_sec, if exists */
+ int var_idx;
+
+ /* extern or resolved/global symbol */
+ bool is_extern;
+ /* weak or strong symbol, never goes back from strong to weak */
+ bool is_weak;
+};
+
struct dst_sec {
char *sec_name;
/* positional (not necessarily ELF) index in an array of sections */
int id;
+ bool ephemeral;
+
/* ELF info */
size_t sec_idx;
Elf_Scn *scn;
@@ -120,22 +147,28 @@ struct bpf_linker {
struct btf *btf;
struct btf_ext *btf_ext;
+
+ /* global (including extern) ELF symbols */
+ int glob_sym_cnt;
+ struct glob_sym *glob_syms;
};
#define pr_warn_elf(fmt, ...) \
-do { \
- libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)); \
-} while (0)
+ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1))
static int init_output_elf(struct bpf_linker *linker, const char *file);
static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
static int linker_sanity_check_btf(struct src_obj *obj);
static int linker_sanity_check_btf_ext(struct src_obj *obj);
static int linker_fixup_btf(struct src_obj *obj);
static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx);
static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj);
static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj);
@@ -282,7 +315,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
/* ELF header */
linker->elf_hdr = elf64_newehdr(linker->elf);
- if (!linker->elf_hdr){
+ if (!linker->elf_hdr) {
pr_warn_elf("failed to create ELF header");
return -EINVAL;
}
@@ -663,8 +696,8 @@ static bool is_pow_of_2(size_t x)
static int linker_sanity_check_elf(struct src_obj *obj)
{
- struct src_sec *sec, *link_sec;
- int i, j, n;
+ struct src_sec *sec;
+ int i, err;
if (!obj->symtab_sec_idx) {
pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename);
@@ -692,43 +725,11 @@ static int linker_sanity_check_elf(struct src_obj *obj)
return -EINVAL;
switch (sec->shdr->sh_type) {
- case SHT_SYMTAB: {
- Elf64_Sym *sym;
-
- if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
- return -EINVAL;
- if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
- return -EINVAL;
-
- if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
- pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
- return -EINVAL;
- }
- link_sec = &obj->secs[sec->shdr->sh_link];
- if (link_sec->shdr->sh_type != SHT_STRTAB) {
- pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
- return -EINVAL;
- }
-
- n = sec->shdr->sh_size / sec->shdr->sh_entsize;
- sym = sec->data->d_buf;
- for (j = 0; j < n; j++, sym++) {
- if (sym->st_shndx
- && sym->st_shndx < SHN_LORESERVE
- && sym->st_shndx >= obj->sec_cnt) {
- pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
- j, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
- return -EINVAL;
- }
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) {
- if (sym->st_value != 0)
- return -EINVAL;
- }
- }
+ case SHT_SYMTAB:
+ err = linker_sanity_check_elf_symtab(obj, sec);
+ if (err)
+ return err;
break;
- }
case SHT_STRTAB:
break;
case SHT_PROGBITS:
@@ -739,87 +740,169 @@ static int linker_sanity_check_elf(struct src_obj *obj)
break;
case SHT_NOBITS:
break;
- case SHT_REL: {
- Elf64_Rel *relo;
- struct src_sec *sym_sec;
+ case SHT_REL:
+ err = linker_sanity_check_elf_relos(obj, sec);
+ if (err)
+ return err;
+ break;
+ case SHT_LLVM_ADDRSIG:
+ break;
+ default:
+ pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
+ sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+ return -EINVAL;
+ }
+ }
- if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
- return -EINVAL;
- if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
- return -EINVAL;
+ return 0;
+}
- /* SHT_REL's sh_link should point to SYMTAB */
- if (sec->shdr->sh_link != obj->symtab_sec_idx) {
- pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
- return -EINVAL;
- }
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec;
+ Elf64_Sym *sym;
+ int i, n;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
+ pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_link];
+ if (link_sec->shdr->sh_type != SHT_STRTAB) {
+ pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
- /* SHT_REL's sh_info points to relocated section */
- if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
- pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ sym = sec->data->d_buf;
+ for (i = 0; i < n; i++, sym++) {
+ int sym_type = ELF64_ST_TYPE(sym->st_info);
+ int sym_bind = ELF64_ST_BIND(sym->st_info);
+ int sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+
+ if (i == 0) {
+ if (sym->st_name != 0 || sym->st_info != 0
+ || sym->st_other != 0 || sym->st_shndx != 0
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #0 is invalid in %s\n", obj->filename);
return -EINVAL;
}
- link_sec = &obj->secs[sec->shdr->sh_info];
+ continue;
+ }
+ if (sym_bind != STB_LOCAL && sym_bind != STB_GLOBAL && sym_bind != STB_WEAK) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol binding %d\n",
+ i, sec->sec_idx, sym_bind);
+ return -EINVAL;
+ }
+ if (sym_vis != STV_DEFAULT && sym_vis != STV_HIDDEN) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol visibility %d\n",
+ i, sec->sec_idx, sym_vis);
+ return -EINVAL;
+ }
+ if (sym->st_shndx == 0) {
+ if (sym_type != STT_NOTYPE || sym_bind == STB_LOCAL
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #%d is invalid extern symbol in %s\n",
+ i, obj->filename);
- /* .rel<secname> -> <secname> pattern is followed */
- if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
- || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
- pr_warn("ELF relo section #%zu name has invalid name in %s\n",
- sec->sec_idx, obj->filename);
return -EINVAL;
}
+ continue;
+ }
+ if (sym->st_shndx < SHN_LORESERVE && sym->st_shndx >= obj->sec_cnt) {
+ pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
+ i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
+ return -EINVAL;
+ }
+ if (sym_type == STT_SECTION) {
+ if (sym->st_value != 0)
+ return -EINVAL;
+ continue;
+ }
+ }
- /* don't further validate relocations for ignored sections */
- if (link_sec->skipped)
- break;
+ return 0;
+}
- /* relocatable section is data or instructions */
- if (link_sec->shdr->sh_type != SHT_PROGBITS
- && link_sec->shdr->sh_type != SHT_NOBITS) {
- pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
- return -EINVAL;
- }
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec, *sym_sec;
+ Elf64_Rel *relo;
+ int i, n;
- /* check sanity of each relocation */
- n = sec->shdr->sh_size / sec->shdr->sh_entsize;
- relo = sec->data->d_buf;
- sym_sec = &obj->secs[obj->symtab_sec_idx];
- for (j = 0; j < n; j++, relo++) {
- size_t sym_idx = ELF64_R_SYM(relo->r_info);
- size_t sym_type = ELF64_R_TYPE(relo->r_info);
-
- if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
- pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
- j, sec->sec_idx, sym_type, obj->filename);
- return -EINVAL;
- }
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
- if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
- pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
- j, sec->sec_idx, sym_idx, obj->filename);
- return -EINVAL;
- }
+ /* SHT_REL's sh_link should point to SYMTAB */
+ if (sec->shdr->sh_link != obj->symtab_sec_idx) {
+ pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
- if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
- if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
- pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
- j, sec->sec_idx, sym_idx, obj->filename);
- return -EINVAL;
- }
- }
- }
- break;
+ /* SHT_REL's sh_info points to relocated section */
+ if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
+ pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_info];
+
+ /* .rel<secname> -> <secname> pattern is followed */
+ if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
+ || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
+ pr_warn("ELF relo section #%zu name has invalid name in %s\n",
+ sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ /* don't further validate relocations for ignored sections */
+ if (link_sec->skipped)
+ return 0;
+
+ /* relocatable section is data or instructions */
+ if (link_sec->shdr->sh_type != SHT_PROGBITS && link_sec->shdr->sh_type != SHT_NOBITS) {
+ pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+
+ /* check sanity of each relocation */
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ relo = sec->data->d_buf;
+ sym_sec = &obj->secs[obj->symtab_sec_idx];
+ for (i = 0; i < n; i++, relo++) {
+ size_t sym_idx = ELF64_R_SYM(relo->r_info);
+ size_t sym_type = ELF64_R_TYPE(relo->r_info);
+
+ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+ pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
+ i, sec->sec_idx, sym_type, obj->filename);
+ return -EINVAL;
}
- case SHT_LLVM_ADDRSIG:
- break;
- default:
- pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
- sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+
+ if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
+ pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
return -EINVAL;
}
+
+ if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
+ pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+ }
}
return 0;
@@ -897,6 +980,7 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s
dst_sec->sec_sz = 0;
dst_sec->sec_idx = 0;
+ dst_sec->ephemeral = src_sec->ephemeral;
/* ephemeral sections are just thin section shells lacking most parts */
if (src_sec->ephemeral)
@@ -904,13 +988,13 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s
scn = elf_newscn(linker->elf);
if (!scn)
- return -1;
+ return -ENOMEM;
data = elf_newdata(scn);
if (!data)
- return -1;
+ return -ENOMEM;
shdr = elf64_getshdr(scn);
if (!shdr)
- return -1;
+ return -ENOMEM;
dst_sec->scn = scn;
dst_sec->shdr = shdr;
@@ -960,6 +1044,9 @@ static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const cha
static bool secs_match(struct dst_sec *dst, struct src_sec *src)
{
+ if (dst->ephemeral || src->ephemeral)
+ return true;
+
if (dst->shdr->sh_type != src->shdr->sh_type) {
pr_warn("sec %s types mismatch\n", dst->sec_name);
return false;
@@ -985,13 +1072,33 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec
return true;
}
-static int extend_sec(struct dst_sec *dst, struct src_sec *src)
+static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src)
{
void *tmp;
- size_t dst_align = dst->shdr->sh_addralign;
- size_t src_align = src->shdr->sh_addralign;
+ size_t dst_align, src_align;
size_t dst_align_sz, dst_final_sz;
+ int err;
+
+ /* Ephemeral source section doesn't contribute anything to ELF
+ * section data.
+ */
+ if (src->ephemeral)
+ return 0;
+
+ /* Some sections (like .maps) can contain both externs (and thus be
+ * ephemeral) and non-externs (map definitions). So it's possible that
+ * it has to be "upgraded" from ephemeral to non-ephemeral when the
+ * first non-ephemeral entity appears. In such case, we add ELF
+ * section, data, etc.
+ */
+ if (dst->ephemeral) {
+ err = init_sec(linker, dst, src);
+ if (err)
+ return err;
+ }
+ dst_align = dst->shdr->sh_addralign;
+ src_align = src->shdr->sh_addralign;
if (dst_align == 0)
dst_align = 1;
if (dst_align < src_align)
@@ -1087,10 +1194,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
/* record mapped section index */
src_sec->dst_id = dst_sec->id;
- if (src_sec->ephemeral)
- continue;
-
- err = extend_sec(dst_sec, src_sec);
+ err = extend_sec(linker, dst_sec, src_sec);
if (err)
return err;
}
@@ -1101,68 +1205,778 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj)
{
struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
- Elf64_Sym *sym = symtab->data->d_buf, *dst_sym;
- int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+ Elf64_Sym *sym = symtab->data->d_buf;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize, err;
int str_sec_idx = symtab->shdr->sh_link;
+ const char *sym_name;
obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map));
if (!obj->sym_map)
return -ENOMEM;
for (i = 0; i < n; i++, sym++) {
- struct src_sec *src_sec = NULL;
- struct dst_sec *dst_sec = NULL;
- const char *sym_name;
- size_t dst_sym_idx;
- int name_off;
-
- /* we already have all-zero initial symbol */
- if (sym->st_name == 0 && sym->st_info == 0 &&
- sym->st_other == 0 && sym->st_shndx == SHN_UNDEF &&
- sym->st_value == 0 && sym->st_size ==0)
+ /* We already validated all-zero symbol #0 and we already
+ * appended it preventively to the final SYMTAB, so skip it.
+ */
+ if (i == 0)
continue;
sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
if (!sym_name) {
pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename);
- return -1;
+ return -EINVAL;
+ }
+
+ err = linker_append_elf_sym(linker, obj, sym, sym_name, i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static Elf64_Sym *get_sym_by_idx(struct bpf_linker *linker, size_t sym_idx)
+{
+ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+ Elf64_Sym *syms = symtab->raw_data;
+
+ return &syms[sym_idx];
+}
+
+static struct glob_sym *find_glob_sym(struct bpf_linker *linker, const char *sym_name)
+{
+ struct glob_sym *glob_sym;
+ const char *name;
+ int i;
+
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ glob_sym = &linker->glob_syms[i];
+ name = strset__data(linker->strtab_strs) + glob_sym->name_off;
+
+ if (strcmp(name, sym_name) == 0)
+ return glob_sym;
+ }
+
+ return NULL;
+}
+
+static struct glob_sym *add_glob_sym(struct bpf_linker *linker)
+{
+ struct glob_sym *syms, *sym;
+
+ syms = libbpf_reallocarray(linker->glob_syms, linker->glob_sym_cnt + 1,
+ sizeof(*linker->glob_syms));
+ if (!syms)
+ return NULL;
+
+ sym = &syms[linker->glob_sym_cnt];
+ memset(sym, 0, sizeof(*sym));
+ sym->var_idx = -1;
+
+ linker->glob_syms = syms;
+ linker->glob_sym_cnt++;
+
+ return sym;
+}
+
+static bool glob_sym_btf_matches(const char *sym_name, bool exact,
+ const struct btf *btf1, __u32 id1,
+ const struct btf *btf2, __u32 id2)
+{
+ const struct btf_type *t1, *t2;
+ bool is_static1, is_static2;
+ const char *n1, *n2;
+ int i, n;
+
+recur:
+ n1 = n2 = NULL;
+ t1 = skip_mods_and_typedefs(btf1, id1, &id1);
+ t2 = skip_mods_and_typedefs(btf2, id2, &id2);
+
+ /* check if only one side is FWD, otherwise handle with common logic */
+ if (!exact && btf_is_fwd(t1) != btf_is_fwd(t2)) {
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible forward declaration names '%s' and '%s'\n",
+ sym_name, n1, n2);
+ return false;
}
+ /* validate if FWD kind matches concrete kind */
+ if (btf_is_fwd(t1)) {
+ if (btf_kflag(t1) && btf_is_union(t2))
+ return true;
+ if (!btf_kflag(t1) && btf_is_struct(t2))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t1) ? "union" : "struct", btf_kind_str(t2));
+ } else {
+ if (btf_kflag(t2) && btf_is_union(t1))
+ return true;
+ if (!btf_kflag(t2) && btf_is_struct(t1))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t2) ? "union" : "struct", btf_kind_str(t1));
+ }
+ return false;
+ }
+
+ if (btf_kind(t1) != btf_kind(t2)) {
+ pr_warn("global '%s': incompatible BTF kinds %s and %s\n",
+ sym_name, btf_kind_str(t1), btf_kind_str(t2));
+ return false;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible %s names '%s' and '%s'\n",
+ sym_name, btf_kind_str(t1), n1, n2);
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_UNKN: /* void */
+ case BTF_KIND_FWD:
+ return true;
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ /* ignore encoding for int and enum values for enum */
+ if (t1->size != t2->size) {
+ pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
+ sym_name, btf_kind_str(t1), n1, t1->size, t2->size);
+ return false;
+ }
+ return true;
+ case BTF_KIND_PTR:
+ /* just validate overall shape of the referenced type, so no
+ * contents comparison for struct/union, and allowd fwd vs
+ * struct/union
+ */
+ exact = false;
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ /* ignore index type and array size */
+ id1 = btf_array(t1)->type;
+ id2 = btf_array(t2)->type;
+ goto recur;
+ case BTF_KIND_FUNC:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_func_linkage(t1) == BTF_FUNC_STATIC;
+ is_static2 = btf_func_linkage(t2) == BTF_FUNC_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible func '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_VAR:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_var(t1)->linkage == BTF_VAR_STATIC;
+ is_static2 = btf_var(t2)->linkage == BTF_VAR_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible var '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m1, *m2;
+
+ if (!exact)
+ return true;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s fields %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ n1 = btf__str_by_offset(btf1, m1->name_off);
+ n2 = btf__str_by_offset(btf2, m2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible field #%d names '%s' and '%s'\n",
+ sym_name, i, n1, n2);
+ return false;
+ }
+ if (m1->offset != m2->offset) {
+ pr_warn("global '%s': incompatible field #%d ('%s') offsets\n",
+ sym_name, i, n1);
+ return false;
+ }
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ return true;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *m1, *m2;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s params %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ /* ignore func arg names */
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ /* now check return type as well */
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ }
+
+ /* skip_mods_and_typedefs() make this impossible */
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ /* DATASECs are never compared with each other */
+ case BTF_KIND_DATASEC:
+ default:
+ pr_warn("global '%s': unsupported BTF kind %s\n",
+ sym_name, btf_kind_str(t1));
+ return false;
+ }
+}
+
+static bool map_defs_match(const char *sym_name,
+ const struct btf *main_btf,
+ const struct btf_map_def *main_def,
+ const struct btf_map_def *main_inner_def,
+ const struct btf *extra_btf,
+ const struct btf_map_def *extra_def,
+ const struct btf_map_def *extra_inner_def)
+{
+ const char *reason;
+
+ if (main_def->map_type != extra_def->map_type) {
+ reason = "type";
+ goto mismatch;
+ }
+
+ /* check key type/size match */
+ if (main_def->key_size != extra_def->key_size) {
+ reason = "key_size";
+ goto mismatch;
+ }
+ if (!!main_def->key_type_id != !!extra_def->key_type_id) {
+ reason = "key type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_KEY_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->key_type_id,
+ extra_btf, extra_def->key_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ /* validate value type/size match */
+ if (main_def->value_size != extra_def->value_size) {
+ reason = "value_size";
+ goto mismatch;
+ }
+ if (!!main_def->value_type_id != !!extra_def->value_type_id) {
+ reason = "value type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_VALUE_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->value_type_id,
+ extra_btf, extra_def->value_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ if (main_def->max_entries != extra_def->max_entries) {
+ reason = "max_entries";
+ goto mismatch;
+ }
+ if (main_def->map_flags != extra_def->map_flags) {
+ reason = "map_flags";
+ goto mismatch;
+ }
+ if (main_def->numa_node != extra_def->numa_node) {
+ reason = "numa_node";
+ goto mismatch;
+ }
+ if (main_def->pinning != extra_def->pinning) {
+ reason = "pinning";
+ goto mismatch;
+ }
+
+ if ((main_def->parts & MAP_DEF_INNER_MAP) != (extra_def->parts & MAP_DEF_INNER_MAP)) {
+ reason = "inner map";
+ goto mismatch;
+ }
- if (sym->st_shndx && sym->st_shndx < SHN_LORESERVE) {
- src_sec = &obj->secs[sym->st_shndx];
- if (src_sec->skipped)
+ if (main_def->parts & MAP_DEF_INNER_MAP) {
+ char inner_map_name[128];
+
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", sym_name);
+
+ return map_defs_match(inner_map_name,
+ main_btf, main_inner_def, NULL,
+ extra_btf, extra_inner_def, NULL);
+ }
+
+ return true;
+
+mismatch:
+ pr_warn("global '%s': map %s mismatch\n", sym_name, reason);
+ return false;
+}
+
+static bool glob_map_defs_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, int btf_id)
+{
+ struct btf_map_def dst_def = {}, dst_inner_def = {};
+ struct btf_map_def src_def = {}, src_inner_def = {};
+ const struct btf_type *t;
+ int err;
+
+ t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(t)) {
+ pr_warn("global '%s': invalid map definition type [%d]\n", sym_name, btf_id);
+ return false;
+ }
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+
+ err = parse_btf_map_def(sym_name, obj->btf, t, true /*strict*/, &src_def, &src_inner_def);
+ if (err) {
+ pr_warn("global '%s': invalid map definition\n", sym_name);
+ return false;
+ }
+
+ /* re-parse existing map definition */
+ t = btf__type_by_id(linker->btf, glob_sym->btf_id);
+ t = skip_mods_and_typedefs(linker->btf, t->type, NULL);
+ err = parse_btf_map_def(sym_name, linker->btf, t, true /*strict*/, &dst_def, &dst_inner_def);
+ if (err) {
+ /* this should not happen, because we already validated it */
+ pr_warn("global '%s': invalid dst map definition\n", sym_name);
+ return false;
+ }
+
+ /* Currently extern map definition has to be complete and match
+ * concrete map definition exactly. This restriction might be lifted
+ * in the future.
+ */
+ return map_defs_match(sym_name, linker->btf, &dst_def, &dst_inner_def,
+ obj->btf, &src_def, &src_inner_def);
+}
+
+static bool glob_syms_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id)
+{
+ const struct btf_type *src_t;
+
+ /* if we are dealing with externs, BTF types describing both global
+ * and extern VARs/FUNCs should be completely present in all files
+ */
+ if (!glob_sym->btf_id || !btf_id) {
+ pr_warn("BTF info is missing for global symbol '%s'\n", sym_name);
+ return false;
+ }
+
+ src_t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(src_t) && !btf_is_func(src_t)) {
+ pr_warn("only extern variables and functions are supported, but got '%s' for '%s'\n",
+ btf_kind_str(src_t), sym_name);
+ return false;
+ }
+
+ /* deal with .maps definitions specially */
+ if (glob_sym->sec_id && strcmp(linker->secs[glob_sym->sec_id].sec_name, MAPS_ELF_SEC) == 0)
+ return glob_map_defs_match(sym_name, linker, glob_sym, obj, sym, btf_id);
+
+ if (!glob_sym_btf_matches(sym_name, true /*exact*/,
+ linker->btf, glob_sym->btf_id, obj->btf, btf_id))
+ return false;
+
+ return true;
+}
+
+static bool btf_is_non_static(const struct btf_type *t)
+{
+ return (btf_is_var(t) && btf_var(t)->linkage != BTF_VAR_STATIC)
+ || (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_STATIC);
+}
+
+static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name,
+ int *out_btf_sec_id, int *out_btf_id)
+{
+ int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0;
+ const struct btf_type *t;
+ const struct btf_var_secinfo *vi;
+ const char *name;
+
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(obj->btf, i);
+
+ /* some global and extern FUNCs and VARs might not be associated with any
+ * DATASEC, so try to detect them in the same pass
+ */
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, sym_name) != 0)
continue;
- dst_sec = &linker->secs[src_sec->dst_id];
- /* allow only one STT_SECTION symbol per section */
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sec->sec_sym_idx) {
- obj->sym_map[i] = dst_sec->sec_sym_idx;
+ /* remember and still try to find DATASEC */
+ btf_id = i;
+ continue;
+ }
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ vi = btf_var_secinfos(t);
+ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+ t = btf__type_by_id(obj->btf, vi->type);
+ name = btf__str_by_offset(obj->btf, t->name_off);
+
+ if (strcmp(name, sym_name) != 0)
+ continue;
+ if (btf_is_var(t) && btf_var(t)->linkage == BTF_VAR_STATIC)
continue;
+ if (btf_is_func(t) && btf_func_linkage(t) == BTF_FUNC_STATIC)
+ continue;
+
+ if (btf_id && btf_id != vi->type) {
+ pr_warn("global/extern '%s' BTF is ambiguous: both types #%d and #%u match\n",
+ sym_name, btf_id, vi->type);
+ return -EINVAL;
}
+
+ *out_btf_sec_id = i;
+ *out_btf_id = vi->type;
+
+ return 0;
}
+ }
+
+ /* free-floating extern or global FUNC */
+ if (btf_id) {
+ *out_btf_sec_id = 0;
+ *out_btf_id = btf_id;
+ return 0;
+ }
- name_off = strset__add_str(linker->strtab_strs, sym_name);
- if (name_off < 0)
- return name_off;
+ pr_warn("failed to find BTF info for global/extern symbol '%s'\n", sym_name);
+ return -ENOENT;
+}
- dst_sym = add_new_sym(linker, &dst_sym_idx);
- if (!dst_sym)
- return -ENOMEM;
+static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *sec;
+ int i;
- dst_sym->st_name = name_off;
- dst_sym->st_info = sym->st_info;
- dst_sym->st_other = sym->st_other;
- dst_sym->st_shndx = src_sec ? dst_sec->sec_idx : sym->st_shndx;
- dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
- dst_sym->st_size = sym->st_size;
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static int complete_extern_btf_info(struct btf *dst_btf, int dst_id,
+ struct btf *src_btf, int src_id)
+{
+ struct btf_type *dst_t = btf_type_by_id(dst_btf, dst_id);
+ struct btf_type *src_t = btf_type_by_id(src_btf, src_id);
+ struct btf_param *src_p, *dst_p;
+ const char *s;
+ int i, n, off;
+
+ /* We already made sure that source and destination types (FUNC or
+ * VAR) match in terms of types and argument names.
+ */
+ if (btf_is_var(dst_t)) {
+ btf_var(dst_t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ return 0;
+ }
+
+ dst_t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_GLOBAL, 0);
+
+ /* now onto FUNC_PROTO types */
+ src_t = btf_type_by_id(src_btf, src_t->type);
+ dst_t = btf_type_by_id(dst_btf, dst_t->type);
+
+ /* Fill in all the argument names, which for extern FUNCs are missing.
+ * We'll end up with two copies of FUNCs/VARs for externs, but that
+ * will be taken care of by BTF dedup at the very end.
+ * It might be that BTF types for extern in one file has less/more BTF
+ * information (e.g., FWD instead of full STRUCT/UNION information),
+ * but that should be (in most cases, subject to BTF dedup rules)
+ * handled and resolved by BTF dedup algorithm as well, so we won't
+ * worry about it. Our only job is to make sure that argument names
+ * are populated on both sides, otherwise BTF dedup will pedantically
+ * consider them different.
+ */
+ src_p = btf_params(src_t);
+ dst_p = btf_params(dst_t);
+ for (i = 0, n = btf_vlen(dst_t); i < n; i++, src_p++, dst_p++) {
+ if (!src_p->name_off)
+ continue;
+
+ /* src_btf has more complete info, so add name to dst_btf */
+ s = btf__str_by_offset(src_btf, src_p->name_off);
+ off = btf__add_str(dst_btf, s);
+ if (off < 0)
+ return off;
+ dst_p->name_off = off;
+ }
+ return 0;
+}
+
+static void sym_update_bind(Elf64_Sym *sym, int sym_bind)
+{
+ sym->st_info = ELF64_ST_INFO(sym_bind, ELF64_ST_TYPE(sym->st_info));
+}
+
+static void sym_update_type(Elf64_Sym *sym, int sym_type)
+{
+ sym->st_info = ELF64_ST_INFO(ELF64_ST_BIND(sym->st_info), sym_type);
+}
+
+static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
+{
+ /* libelf doesn't provide setters for ST_VISIBILITY,
+ * but it is stored in the lower 2 bits of st_other
+ */
+ sym->st_other &= 0x03;
+ sym->st_other |= sym_vis;
+}
+
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx)
+{
+ struct src_sec *src_sec = NULL;
+ struct dst_sec *dst_sec = NULL;
+ struct glob_sym *glob_sym = NULL;
+ int name_off, sym_type, sym_bind, sym_vis, err;
+ int btf_sec_id = 0, btf_id = 0;
+ size_t dst_sym_idx;
+ Elf64_Sym *dst_sym;
+ bool sym_is_extern;
+
+ sym_type = ELF64_ST_TYPE(sym->st_info);
+ sym_bind = ELF64_ST_BIND(sym->st_info);
+ sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+ sym_is_extern = sym->st_shndx == SHN_UNDEF;
+
+ if (sym_is_extern) {
+ if (!obj->btf) {
+ pr_warn("externs without BTF info are not supported\n");
+ return -ENOTSUP;
+ }
+ } else if (sym->st_shndx < SHN_LORESERVE) {
+ src_sec = &obj->secs[sym->st_shndx];
+ if (src_sec->skipped)
+ return 0;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* allow only one STT_SECTION symbol per section */
+ if (sym_type == STT_SECTION && dst_sec->sec_sym_idx) {
+ obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
+ return 0;
+ }
+ }
+
+ if (sym_bind == STB_LOCAL)
+ goto add_sym;
+
+ /* find matching BTF info */
+ err = find_glob_sym_btf(obj, sym, sym_name, &btf_sec_id, &btf_id);
+ if (err)
+ return err;
+
+ if (sym_is_extern && btf_sec_id) {
+ const char *sec_name = NULL;
+ const struct btf_type *t;
+
+ t = btf__type_by_id(obj->btf, btf_sec_id);
+ sec_name = btf__str_by_offset(obj->btf, t->name_off);
+
+ /* Clang puts unannotated extern vars into
+ * '.extern' BTF DATASEC. Treat them the same
+ * as unannotated extern funcs (which are
+ * currently not put into any DATASECs).
+ * Those don't have associated src_sec/dst_sec.
+ */
+ if (strcmp(sec_name, BTF_EXTERN_SEC) != 0) {
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("failed to find matching ELF sec '%s'\n", sec_name);
+ return -ENOENT;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+ }
+ }
+
+ glob_sym = find_glob_sym(linker, sym_name);
+ if (glob_sym) {
+ /* Preventively resolve to existing symbol. This is
+ * needed for further relocation symbol remapping in
+ * the next step of linking.
+ */
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+
+ /* If both symbols are non-externs, at least one of
+ * them has to be STB_WEAK, otherwise they are in
+ * a conflict with each other.
+ */
+ if (!sym_is_extern && !glob_sym->is_extern
+ && !glob_sym->is_weak && sym_bind != STB_WEAK) {
+ pr_warn("conflicting non-weak symbol #%d (%s) definition in '%s'\n",
+ src_sym_idx, sym_name, obj->filename);
+ return -EINVAL;
+ }
- obj->sym_map[i] = dst_sym_idx;
+ if (!glob_syms_match(sym_name, linker, glob_sym, obj, sym, src_sym_idx, btf_id))
+ return -EINVAL;
+
+ dst_sym = get_sym_by_idx(linker, glob_sym->sym_idx);
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sym) {
- dst_sec->sec_sym_idx = dst_sym_idx;
- dst_sym->st_value = 0;
+ /* If new symbol is strong, then force dst_sym to be strong as
+ * well; this way a mix of weak and non-weak extern
+ * definitions will end up being strong.
+ */
+ if (sym_bind == STB_GLOBAL) {
+ /* We still need to preserve type (NOTYPE or
+ * OBJECT/FUNC, depending on whether the symbol is
+ * extern or not)
+ */
+ sym_update_bind(dst_sym, STB_GLOBAL);
+ glob_sym->is_weak = false;
}
+ /* Non-default visibility is "contaminating", with stricter
+ * visibility overwriting more permissive ones, even if more
+ * permissive visibility comes from just an extern definition.
+ * Currently only STV_DEFAULT and STV_HIDDEN are allowed and
+ * ensured by ELF symbol sanity checks above.
+ */
+ if (sym_vis > ELF64_ST_VISIBILITY(dst_sym->st_other))
+ sym_update_visibility(dst_sym, sym_vis);
+
+ /* If the new symbol is extern, then regardless if
+ * existing symbol is extern or resolved global, just
+ * keep the existing one untouched.
+ */
+ if (sym_is_extern)
+ return 0;
+
+ /* If existing symbol is a strong resolved symbol, bail out,
+ * because we lost resolution battle have nothing to
+ * contribute. We already checked abover that there is no
+ * strong-strong conflict. We also already tightened binding
+ * and visibility, so nothing else to contribute at that point.
+ */
+ if (!glob_sym->is_extern && sym_bind == STB_WEAK)
+ return 0;
+
+ /* At this point, new symbol is strong non-extern,
+ * so overwrite glob_sym with new symbol information.
+ * Preserve binding and visibility.
+ */
+ sym_update_type(dst_sym, sym_type);
+ dst_sym->st_shndx = dst_sec->sec_idx;
+ dst_sym->st_value = src_sec->dst_off + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ /* see comment below about dst_sec->id vs dst_sec->sec_idx */
+ glob_sym->sec_id = dst_sec->id;
+ glob_sym->is_extern = false;
+
+ if (complete_extern_btf_info(linker->btf, glob_sym->btf_id,
+ obj->btf, btf_id))
+ return -EINVAL;
+
+ /* request updating VAR's/FUNC's underlying BTF type when appending BTF type */
+ glob_sym->underlying_btf_id = 0;
+
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+ return 0;
+ }
+
+add_sym:
+ name_off = strset__add_str(linker->strtab_strs, sym_name);
+ if (name_off < 0)
+ return name_off;
+
+ dst_sym = add_new_sym(linker, &dst_sym_idx);
+ if (!dst_sym)
+ return -ENOMEM;
+
+ dst_sym->st_name = name_off;
+ dst_sym->st_info = sym->st_info;
+ dst_sym->st_other = sym->st_other;
+ dst_sym->st_shndx = dst_sec ? dst_sec->sec_idx : sym->st_shndx;
+ dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ obj->sym_map[src_sym_idx] = dst_sym_idx;
+
+ if (sym_type == STT_SECTION && dst_sym) {
+ dst_sec->sec_sym_idx = dst_sym_idx;
+ dst_sym->st_value = 0;
+ }
+
+ if (sym_bind != STB_LOCAL) {
+ glob_sym = add_glob_sym(linker);
+ if (!glob_sym)
+ return -ENOMEM;
+
+ glob_sym->sym_idx = dst_sym_idx;
+ /* we use dst_sec->id (and not dst_sec->sec_idx), because
+ * ephemeral sections (.kconfig, .ksyms, etc) don't have
+ * sec_idx (as they don't have corresponding ELF section), but
+ * still have id. .extern doesn't have even ephemeral section
+ * associated with it, so dst_sec->id == dst_sec->sec_idx == 0.
+ */
+ glob_sym->sec_id = dst_sec ? dst_sec->id : 0;
+ glob_sym->name_off = name_off;
+ /* we will fill btf_id in during BTF merging step */
+ glob_sym->btf_id = 0;
+ glob_sym->is_extern = sym_is_extern;
+ glob_sym->is_weak = sym_bind == STB_WEAK;
}
return 0;
@@ -1200,7 +2014,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
return err;
}
} else if (!secs_match(dst_sec, src_sec)) {
- pr_warn("Secs %s are not compatible\n", src_sec->sec_name);
+ pr_warn("sections %s are not compatible\n", src_sec->sec_name);
return -1;
}
@@ -1212,7 +2026,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
dst_sec->shdr->sh_info = dst_linked_sec->sec_idx;
src_sec->dst_id = dst_sec->id;
- err = extend_sec(dst_sec, src_sec);
+ err = extend_sec(linker, dst_sec, src_sec);
if (err)
return err;
@@ -1265,21 +2079,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
return 0;
}
-static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
-{
- struct src_sec *sec;
- int i;
-
- for (i = 1; i < obj->sec_cnt; i++) {
- sec = &obj->secs[i];
-
- if (strcmp(sec->sec_name, sec_name) == 0)
- return sec;
- }
-
- return NULL;
-}
-
static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx,
int sym_type, const char *sym_name)
{
@@ -1334,12 +2133,32 @@ static int linker_fixup_btf(struct src_obj *obj)
t->size = sec->shdr->sh_size;
} else {
/* BTF can have some sections that are not represented
- * in ELF, e.g., .kconfig and .ksyms, which are used
- * for special extern variables. Here we'll
- * pre-create "section shells" for them to be able to
- * keep track of extra per-section metadata later
- * (e.g., BTF variables).
+ * in ELF, e.g., .kconfig, .ksyms, .extern, which are used
+ * for special extern variables.
+ *
+ * For all but one such special (ephemeral)
+ * sections, we pre-create "section shells" to be able
+ * to keep track of extra per-section metadata later
+ * (e.g., those BTF extern variables).
+ *
+ * .extern is even more special, though, because it
+ * contains extern variables that need to be resolved
+ * by static linker, not libbpf and kernel. When such
+ * externs are resolved, we are going to remove them
+ * from .extern BTF section and might end up not
+ * needing it at all. Each resolved extern should have
+ * matching non-extern VAR/FUNC in other sections.
+ *
+ * We do support leaving some of the externs
+ * unresolved, though, to support cases of building
+ * libraries, which will later be linked against final
+ * BPF applications. So if at finalization we still
+ * see unresolved externs, we'll create .extern
+ * section on our own.
*/
+ if (strcmp(sec_name, BTF_EXTERN_SEC) == 0)
+ continue;
+
sec = add_src_sec(obj, sec_name);
if (!sec)
return -ENOMEM;
@@ -1379,6 +2198,13 @@ static int linker_fixup_btf(struct src_obj *obj)
static int remap_type_id(__u32 *type_id, void *ctx)
{
int *id_map = ctx;
+ int new_id = id_map[*type_id];
+
+ /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */
+ if (new_id == 0 && *type_id != 0) {
+ pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id);
+ return -EINVAL;
+ }
*type_id = id_map[*type_id];
@@ -1389,6 +2215,7 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
{
const struct btf_type *t;
int i, j, n, start_id, id;
+ const char *name;
if (!obj->btf)
return 0;
@@ -1401,12 +2228,44 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
return -ENOMEM;
for (i = 1; i <= n; i++) {
+ struct glob_sym *glob_sym = NULL;
+
t = btf__type_by_id(obj->btf, i);
/* DATASECs are handled specially below */
if (btf_kind(t) == BTF_KIND_DATASEC)
continue;
+ if (btf_is_non_static(t)) {
+ /* there should be glob_sym already */
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+
+ /* VARs without corresponding glob_sym are those that
+ * belong to skipped/deduplicated sections (i.e.,
+ * license and version), so just skip them
+ */
+ if (!glob_sym)
+ continue;
+
+ /* linker_append_elf_sym() might have requested
+ * updating underlying type ID, if extern was resolved
+ * to strong symbol or weak got upgraded to non-weak
+ */
+ if (glob_sym->underlying_btf_id == 0)
+ glob_sym->underlying_btf_id = -t->type;
+
+ /* globals from previous object files that match our
+ * VAR/FUNC already have a corresponding associated
+ * BTF type, so just make sure to use it
+ */
+ if (glob_sym->btf_id) {
+ /* reuse existing BTF type for global var/func */
+ obj->btf_type_map[i] = glob_sym->btf_id;
+ continue;
+ }
+ }
+
id = btf__add_type(linker->btf, obj->btf, t);
if (id < 0) {
pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename);
@@ -1414,6 +2273,12 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
}
obj->btf_type_map[i] = id;
+
+ /* record just appended BTF type for var/func */
+ if (glob_sym) {
+ glob_sym->btf_id = id;
+ glob_sym->underlying_btf_id = -t->type;
+ }
}
/* remap all the types except DATASECs */
@@ -1425,6 +2290,22 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
return -EINVAL;
}
+ /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's
+ * actual type), if necessary
+ */
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ struct glob_sym *glob_sym = &linker->glob_syms[i];
+ struct btf_type *glob_t;
+
+ if (glob_sym->underlying_btf_id >= 0)
+ continue;
+
+ glob_sym->underlying_btf_id = obj->btf_type_map[-glob_sym->underlying_btf_id];
+
+ glob_t = btf_type_by_id(linker->btf, glob_sym->btf_id);
+ glob_t->type = glob_sym->underlying_btf_id;
+ }
+
/* append DATASEC info */
for (i = 1; i < obj->sec_cnt; i++) {
struct src_sec *src_sec;
@@ -1452,6 +2333,42 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
n = btf_vlen(t);
for (j = 0; j < n; j++, src_var++) {
void *sec_vars = dst_sec->sec_vars;
+ int new_id = obj->btf_type_map[src_var->type];
+ struct glob_sym *glob_sym = NULL;
+
+ t = btf_type_by_id(linker->btf, new_id);
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(linker->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+ if (glob_sym->sec_id != dst_sec->id) {
+ pr_warn("global '%s': section mismatch %d vs %d\n",
+ name, glob_sym->sec_id, dst_sec->id);
+ return -EINVAL;
+ }
+ }
+
+ /* If there is already a member (VAR or FUNC) mapped
+ * to the same type, don't add a duplicate entry.
+ * This will happen when multiple object files define
+ * the same extern VARs/FUNCs.
+ */
+ if (glob_sym && glob_sym->var_idx >= 0) {
+ __s64 sz;
+
+ dst_var = &dst_sec->sec_vars[glob_sym->var_idx];
+ /* Because underlying BTF type might have
+ * changed, so might its size have changed, so
+ * re-calculate and update it in sec_var.
+ */
+ sz = btf__resolve_size(linker->btf, glob_sym->underlying_btf_id);
+ if (sz < 0) {
+ pr_warn("global '%s': failed to resolve size of underlying type: %d\n",
+ name, (int)sz);
+ return -EINVAL;
+ }
+ dst_var->size = sz;
+ continue;
+ }
sec_vars = libbpf_reallocarray(sec_vars,
dst_sec->sec_var_cnt + 1,
@@ -1466,6 +2383,9 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
dst_var->type = obj->btf_type_map[src_var->type];
dst_var->size = src_var->size;
dst_var->offset = src_sec->dst_off + src_var->offset;
+
+ if (glob_sym)
+ glob_sym->var_idx = dst_sec->sec_var_cnt - 1;
}
}
@@ -1895,7 +2815,7 @@ static int finalize_btf_ext(struct bpf_linker *linker)
hdr->func_info_len = funcs_sz;
hdr->line_info_off = funcs_sz;
hdr->line_info_len = lines_sz;
- hdr->core_relo_off = funcs_sz + lines_sz;;
+ hdr->core_relo_off = funcs_sz + lines_sz;
hdr->core_relo_len = core_relos_sz;
if (funcs_sz) {
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index a402f32a145c..91130648d8e6 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -39,8 +39,6 @@ EXTRA_WARNINGS += -Wundef
EXTRA_WARNINGS += -Wwrite-strings
EXTRA_WARNINGS += -Wformat
-CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
-
# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
# environment or command line. This is necessary for CC and AR
@@ -52,12 +50,22 @@ define allow-override
$(eval $(1) = $(2)))
endef
+ifneq ($(LLVM),)
+$(call allow-override,CC,clang)
+$(call allow-override,AR,llvm-ar)
+$(call allow-override,LD,ld.lld)
+$(call allow-override,CXX,clang++)
+$(call allow-override,STRIP,llvm-strip)
+else
# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,AR,$(CROSS_COMPILE)ar)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
$(call allow-override,CXX,$(CROSS_COMPILE)g++)
$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+endif
+
+CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
ifneq ($(LLVM),)
HOSTAR ?= llvm-ar
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6448c626498f..283e5ad8385e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,13 +21,18 @@ endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -Og -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
+CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lz -lrt -lpthread
+# Silence some warnings when compiled with clang
+ifneq ($(LLVM),)
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_verifier_log test_dev_cgroup \
@@ -182,7 +187,6 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
cp $(SCRATCH_DIR)/runqslower $@
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
-$(TEST_GEN_FILES): docs
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -201,10 +205,12 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
CC=$(HOSTCC) LD=$(HOSTLD) \
- EXTRA_CFLAGS='-g -Og' \
+ EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
+all: docs
+
docs:
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
-f Makefile.docs \
@@ -219,7 +225,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
- EXTRA_CFLAGS='-g -Og' \
+ EXTRA_CFLAGS='-g -O0' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
@@ -227,7 +233,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
- EXTRA_CFLAGS='-g -Og' \
+ EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
@@ -303,9 +309,15 @@ endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
-LINKED_SKELS := test_static_linked.skel.h
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
+ linked_vars.skel.h linked_maps.skel.h
test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
+linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
+linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
+linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+
+LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
@@ -325,7 +337,7 @@ TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
- $$(filter-out $(SKEL_BLACKLIST), \
+ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
@@ -481,7 +493,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 65fe318d1e71..3353778c30f8 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -193,3 +193,12 @@ Without it, the error from compiling bpf selftests looks like:
libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
__ https://reviews.llvm.org/D93563
+
+Clang dependencies for static linking tests
+===========================================
+
+linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
+generate valid BTF information for weak variables. Please make sure you use
+Clang that contains the fix.
+
+__ https://reviews.llvm.org/D100362
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 37e1f303fc11..5192305159ec 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -44,3 +44,5 @@ CONFIG_SECURITYFS=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_READ_POLICY=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_DYNAMIC_FTRACE=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 74c45d557a2b..2d3590cfb5e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -147,6 +147,7 @@ static void test_task_stack(void)
return;
do_dummy_read(skel->progs.dump_task_stack);
+ do_dummy_read(skel->progs.get_task_user_stacks);
bpf_iter_task_stack__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 5c0448910426..63990842d20f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -58,42 +58,73 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
test_cb cb)
{
struct bpf_object *obj = NULL, *tgt_obj;
+ __u32 retval, tgt_prog_id, info_len;
+ struct bpf_prog_info prog_info = {};
struct bpf_program **prog = NULL;
struct bpf_link **link = NULL;
- __u32 duration = 0, retval;
int err, tgt_fd, i;
+ struct btf *btf;
err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
- if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
- target_obj_file, err, errno))
+ if (!ASSERT_OK(err, "tgt_prog_load"))
return;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.attach_prog_fd = tgt_fd,
);
+ info_len = sizeof(prog_info);
+ err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len);
+ if (!ASSERT_OK(err, "tgt_fd_get_info"))
+ goto close_prog;
+
+ tgt_prog_id = prog_info.id;
+ btf = bpf_object__btf(tgt_obj);
+
link = calloc(sizeof(struct bpf_link *), prog_cnt);
+ if (!ASSERT_OK_PTR(link, "link_ptr"))
+ goto close_prog;
+
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
+ if (!ASSERT_OK_PTR(prog, "prog_ptr"))
goto close_prog;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
+ if (!ASSERT_OK(err, "obj_load"))
goto close_prog;
for (i = 0; i < prog_cnt; i++) {
+ struct bpf_link_info link_info;
+ char *tgt_name;
+ __s32 btf_id;
+
+ tgt_name = strstr(prog_name[i], "/");
+ if (!ASSERT_OK_PTR(tgt_name, "tgt_name"))
+ goto close_prog;
+ btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC);
+
prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+ if (!ASSERT_OK_PTR(prog[i], prog_name[i]))
goto close_prog;
+
link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(link[i], "attach_trace"))
goto close_prog;
+
+ info_len = sizeof(link_info);
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]),
+ &link_info, &info_len);
+ ASSERT_OK(err, "link_fd_get_info");
+ ASSERT_EQ(link_info.tracing.attach_type,
+ bpf_program__get_expected_attach_type(prog[i]),
+ "link_attach_type");
+ ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
+ ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
}
if (cb) {
@@ -106,10 +137,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
goto close_prog;
err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ NULL, NULL, &retval, NULL);
+ ASSERT_OK(err, "prog_run");
+ ASSERT_EQ(retval, 0, "prog_run_ret");
if (check_data_map(obj, prog_cnt, false))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index 6c4d42a2386f..ccc7e8a34ab6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -39,7 +39,7 @@ void test_fexit_sleep(void)
goto cleanup;
cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
- if (CHECK(cpid == -1, "clone", strerror(errno)))
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
goto cleanup;
/* wait until first sys_nanosleep ends and second sys_nanosleep starts */
@@ -65,7 +65,7 @@ void test_fexit_sleep(void)
/* kill the thread to unwind sys_nanosleep stack through the trampoline */
kill(cpid, 9);
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
goto cleanup;
if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
new file mode 100644
index 000000000000..e9916f2817ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_funcs.skel.h"
+
+void test_linked_funcs(void)
+{
+ int err;
+ struct linked_funcs *skel;
+
+ skel = linked_funcs__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->my_tid = syscall(SYS_gettid);
+ skel->bss->syscall_id = SYS_getpgid;
+
+ err = linked_funcs__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_funcs__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1");
+ ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1");
+ ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1");
+
+ ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2");
+ ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2");
+ /* output_weak2 should never be updated */
+ ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2");
+
+cleanup:
+ linked_funcs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
new file mode 100644
index 000000000000..85dcaaaf2775
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_maps.skel.h"
+
+void test_linked_maps(void)
+{
+ int err;
+ struct linked_maps *skel;
+
+ skel = linked_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = linked_maps__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1");
+ ASSERT_EQ(skel->bss->output_second1, 2, "output_second1");
+ ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1");
+
+cleanup:
+ linked_maps__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
new file mode 100644
index 000000000000..267166abe4c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_vars.skel.h"
+
+void test_linked_vars(void)
+{
+ int err;
+ struct linked_vars *skel;
+
+ skel = linked_vars__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->input_bss1 = 1000;
+ skel->bss->input_bss2 = 2000;
+ skel->bss->input_bss_weak = 3000;
+
+ err = linked_vars__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_vars__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1");
+ ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2");
+ /* 10 comes from "winner" input_data_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1");
+ ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2");
+ /* 100 comes from "winner" input_rodata_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1");
+ ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2");
+
+cleanup:
+ linked_vars__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index c230a573c373..4972f92205c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -12,11 +12,22 @@ void test_map_ptr(void)
__u32 duration = 0, retval;
char buf[128];
int err;
+ int page_size = getpagesize();
- skel = map_ptr_kern__open_and_load();
- if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ skel = map_ptr_kern__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = map_ptr_kern__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ skel->bss->page_size = page_size;
+
err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
sizeof(pkt_v4), buf, NULL, &retval, NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 9c3c5c0f068f..37b002ca1167 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -29,22 +29,36 @@ void test_mmap(void)
struct test_mmap *skel;
__u64 val = 0;
- skel = test_mmap__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ skel = test_mmap__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ /* at least 4 pages of data */
+ err = bpf_map__set_max_entries(skel->maps.data_map,
+ 4 * (page_size / sizeof(u64)));
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_mmap__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
bss_map = skel->maps.bss;
data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
- tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
- munmap(tmp1, 4096);
+ munmap(tmp1, page_size);
goto cleanup;
}
/* now double-check if it's mmap()'able at all */
- tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 31a3114906e2..2535788e135f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -68,10 +68,10 @@ static void test_ns_current_pid_tgid_new_ns(void)
cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
CLONE_NEWPID | SIGCHLD, NULL);
- if (CHECK(cpid == -1, "clone", strerror(errno)))
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
return;
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
return;
if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index fddbc5db5d6a..de78617f6550 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -87,11 +87,20 @@ void test_ringbuf(void)
pthread_t thread;
long bg_ret = -1;
int err, cnt;
+ int page_size = getpagesize();
- skel = test_ringbuf__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -110,9 +119,9 @@ void test_ringbuf(void)
CHECK(skel->bss->avail_data != 3 * rec_sz,
"err_avail_size", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->avail_data);
- CHECK(skel->bss->ring_size != 4096,
+ CHECK(skel->bss->ring_size != page_size,
"err_ring_size", "exp %ld, got %ld\n",
- 4096L, skel->bss->ring_size);
+ (long)page_size, skel->bss->ring_size);
CHECK(skel->bss->cons_pos != 0,
"err_cons_pos", "exp %ld, got %ld\n",
0L, skel->bss->cons_pos);
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index d37161e59bb2..cef63e703924 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -41,13 +41,42 @@ static int process_sample(void *ctx, void *data, size_t len)
void test_ringbuf_multi(void)
{
struct test_ringbuf_multi *skel;
- struct ring_buffer *ringbuf;
+ struct ring_buffer *ringbuf = NULL;
int err;
+ int page_size = getpagesize();
+ int proto_fd = -1;
- skel = test_ringbuf_multi__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf_multi__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
+ if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
+ if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
+ close(proto_fd);
+ proto_fd = -1;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -97,6 +126,8 @@ void test_ringbuf_multi(void)
2L, skel->bss->total);
cleanup:
+ if (proto_fd >= 0)
+ close(proto_fd);
ring_buffer__free(ringbuf);
test_ringbuf_multi__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
new file mode 100644
index 000000000000..a958c22aec75
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <test_progs.h>
+#include "test_snprintf.skel.h"
+#include "test_snprintf_single.skel.h"
+
+#define EXP_NUM_OUT "-8 9 96 -424242 1337 DABBAD00"
+#define EXP_NUM_RET sizeof(EXP_NUM_OUT)
+
+#define EXP_IP_OUT "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
+#define EXP_IP_RET sizeof(EXP_IP_OUT)
+
+/* The third specifier, %pB, depends on compiler inlining so don't check it */
+#define EXP_SYM_OUT "schedule schedule+0x0/"
+#define MIN_SYM_RET sizeof(EXP_SYM_OUT)
+
+/* The third specifier, %p, is a hashed pointer which changes on every reboot */
+#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
+#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
+
+#define EXP_STR_OUT "str1 longstr"
+#define EXP_STR_RET sizeof(EXP_STR_OUT)
+
+#define EXP_OVER_OUT "%over"
+#define EXP_OVER_RET 10
+
+#define EXP_PAD_OUT " 4 000"
+#define EXP_PAD_RET 900007
+
+#define EXP_NO_ARG_OUT "simple case"
+#define EXP_NO_ARG_RET 12
+
+#define EXP_NO_BUF_RET 29
+
+void test_snprintf_positive(void)
+{
+ char exp_addr_out[] = EXP_ADDR_OUT;
+ char exp_sym_out[] = EXP_SYM_OUT;
+ struct test_snprintf *skel;
+
+ skel = test_snprintf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out");
+ ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret");
+
+ ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out");
+ ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret");
+
+ ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out,
+ sizeof(exp_sym_out) - 1), "sym_out");
+ ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret");
+
+ ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out,
+ sizeof(exp_addr_out) - 1), "addr_out");
+ ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret");
+
+ ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out");
+ ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret");
+
+ ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out");
+ ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret");
+
+ ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out");
+ ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret");
+
+ ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out");
+ ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret");
+
+ ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret");
+
+cleanup:
+ test_snprintf__destroy(skel);
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
+static int load_single_snprintf(char *fmt)
+{
+ struct test_snprintf_single *skel;
+ int ret;
+
+ skel = test_snprintf_single__open();
+ if (!skel)
+ return -EINVAL;
+
+ memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+
+ ret = test_snprintf_single__load(skel);
+ test_snprintf_single__destroy(skel);
+
+ return ret;
+}
+
+void test_snprintf_negative(void)
+{
+ ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
+
+ ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero");
+ ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers");
+ ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1");
+ ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2");
+ ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
+ ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
+ ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+ ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+}
+
+void test_snprintf(void)
+{
+ if (test__start_subtest("snprintf_positive"))
+ test_snprintf_positive();
+ if (test__start_subtest("snprintf_negative"))
+ test_snprintf_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index d5b44b135c00..4b937e5dbaca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -3,6 +3,7 @@
#include "cgroup_helpers.h"
#include <linux/tcp.h>
+#include "sockopt_sk.skel.h"
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -191,60 +192,30 @@ err:
return -1;
}
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+static void run_test(int cgroup_fd)
{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
+ struct sockopt_sk *skel;
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
+ skel = sockopt_sk__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
- if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
- }
-
- return 0;
-}
-
-static void run_test(int cgroup_fd)
-{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_sk.o",
- };
- struct bpf_object *obj;
- int ignored;
- int err;
-
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
- return;
+ skel->bss->page_size = getpagesize();
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._setsockopt =
+ bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link"))
+ goto cleanup;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._getsockopt =
+ bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+ goto cleanup;
- CHECK_FAIL(getsetsockopt());
+ ASSERT_OK(getsetsockopt(), "getsetsockopt");
-close_bpf_object:
- bpf_object__close(obj);
+cleanup:
+ sockopt_sk__destroy(skel);
}
void test_sockopt_sk(void)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 50e59a2e142e..43c36f5f7649 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,3 +35,30 @@ int dump_task_stack(struct bpf_iter__task *ctx)
return 0;
}
+
+SEC("iter/task")
+int get_task_user_stacks(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ uint64_t buf_sz = 0;
+ int64_t res;
+
+ if (task == (void *)0)
+ return 0;
+
+ res = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK);
+ if (res <= 0)
+ return 0;
+
+ buf_sz += res;
+
+ /* If the verifier doesn't refine bpf_get_task_stack res, and instead
+ * assumes res is entirely unknown, this program will fail to load as
+ * the verifier will believe that max buf_sz value allows reading
+ * past the end of entries in bpf_seq_write call
+ */
+ bpf_seq_write(seq, &entries, buf_sz);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
new file mode 100644
index 000000000000..b964ec1390c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between two files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val1;
+int output_ctx1;
+int output_weak1;
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 1;
+}
+
+/* Global functions can't be void */
+int set_output_val1(int x)
+{
+ output_val1 = x + subprog(x);
+ return x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx1(__u64 *ctx)
+{
+ output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should win because it's the first one */
+__weak int set_output_weak(int x)
+{
+ output_weak1 = x;
+ return x;
+}
+
+extern int set_output_val2(int x);
+
+/* here we'll force set_output_ctx2() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx2(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1, struct pt_regs *regs, long id)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ set_output_val2(1000);
+ set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
new file mode 100644
index 000000000000..575e958e60b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between both files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val2;
+int output_ctx2;
+int output_weak2; /* should stay zero */
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+/* Global functions can't be void */
+int set_output_val2(int x)
+{
+ output_val2 = 2 * x + 2 * subprog(x);
+ return 2 * x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx2(__u64 *ctx)
+{
+ output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should lose, because it will be processed second */
+__weak int set_output_weak(int x)
+{
+ output_weak2 = x;
+ return 2 * x;
+}
+
+extern int set_output_val1(int x);
+
+/* here we'll force set_output_ctx1() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx1(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2, struct pt_regs *regs, long id)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ set_output_val1(2000);
+ set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
new file mode 100644
index 000000000000..52291515cc72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct my_key { long x; };
+struct my_value { long x; };
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct my_key);
+ __type(value, struct my_value);
+ __uint(max_entries, 16);
+} map1 SEC(".maps");
+
+ /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't
+ * matter.
+ */
+typedef struct {
+ __uint(max_entries, 8);
+ __type(key, int);
+ __type(value, int);
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+} map2_t;
+
+extern map2_t map2 SEC(".maps");
+
+/* This should be the winning map definition, but we have no way of verifying,
+ * so we just make sure that it links and works without errors
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first1;
+int output_second1;
+int output_weak1;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter1)
+{
+ /* update values with key = 1 */
+ int key = 1, val = 1;
+ struct my_key key_struct = { .x = 1 };
+ struct my_value val_struct = { .x = 1000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit1)
+{
+ /* lookup values with key = 2, set in another file */
+ int key = 2, *val;
+ struct my_key key_struct = { .x = 2 };
+ struct my_value *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first1 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second1 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak1 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c
new file mode 100644
index 000000000000..0693687474ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps2.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* modifiers and typedefs are ignored when comparing key/value types */
+typedef struct my_key { long x; } key_type;
+typedef struct my_value { long x; } value_type;
+
+extern struct {
+ __uint(max_entries, 16);
+ __type(key, key_type);
+ __type(value, value_type);
+ __uint(type, BPF_MAP_TYPE_HASH);
+} map1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 8);
+} map2 SEC(".maps");
+
+/* this definition will lose, but it has to exactly match the winner */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first2;
+int output_second2;
+int output_weak2;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter2)
+{
+ /* update values with key = 2 */
+ int key = 2, val = 2;
+ key_type key_struct = { .x = 2 };
+ value_type val_struct = { .x = 2000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit2)
+{
+ /* lookup values with key = 1, set in another file */
+ int key = 1, *val;
+ key_type key_struct = { .x = 1 };
+ value_type *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first2 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second2 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak2 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c
new file mode 100644
index 000000000000..ef9e9d0bb0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars1.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* this weak extern will be strict due to the other file's strong extern */
+extern bool CONFIG_BPF_SYSCALL __kconfig __weak;
+extern const void bpf_link_fops __ksym __weak;
+
+int input_bss1;
+int input_data1 = 1;
+const volatile int input_rodata1 = 11;
+
+int input_bss_weak __weak;
+/* these two definitions should win */
+int input_data_weak __weak = 10;
+const volatile int input_rodata_weak __weak = 100;
+
+extern int input_bss2;
+extern int input_data2;
+extern const int input_rodata2;
+
+int output_bss1;
+int output_data1;
+int output_rodata1;
+
+long output_sink1;
+
+static __noinline int get_bss_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_bss1 + input_bss2 + input_bss_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1)
+{
+ output_bss1 = get_bss_res();
+ output_data1 = input_data1 + input_data2 + input_data_weak;
+ output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink1 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&bpf_link_fops;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c
new file mode 100644
index 000000000000..e4f5bd388a3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars2.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* when an extern is defined as both strong and weak, resulting symbol will be strong */
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern const void __start_BTF __ksym;
+
+int input_bss2;
+int input_data2 = 2;
+const volatile int input_rodata2 = 22;
+
+int input_bss_weak __weak;
+/* these two weak variables should lose */
+int input_data_weak __weak = 20;
+const volatile int input_rodata_weak __weak = 200;
+
+extern int input_bss1;
+extern int input_data1;
+extern const int input_rodata1;
+
+int output_bss2;
+int output_data2;
+int output_rodata2;
+
+int output_sink2;
+
+static __noinline int get_data_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_data1 + input_data2 + input_data_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2)
+{
+ output_bss2 = input_bss1 + input_bss2 + input_bss_weak;
+ output_data2 = get_data_res();
+ output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink2 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&__start_BTF;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index d8850bc6a9f1..d1d304c980f0 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -12,6 +12,7 @@ _Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
__u32 g_line = 0;
+int page_size = 0; /* userspace should set it */
#define VERIFY_TYPE(type, func) ({ \
g_map_type = type; \
@@ -635,7 +636,6 @@ struct bpf_ringbuf_map {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} m_ringbuf SEC(".maps");
static inline int check_ringbuf(void)
@@ -643,7 +643,7 @@ static inline int check_ringbuf(void)
struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
- VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+ VERIFY(check(&ringbuf->map, map, 0, 0, page_size));
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index fdb4bf4408fa..eeaf6e75c9a2 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -8,18 +8,6 @@ int _version SEC("version") = 1;
SEC("sk_msg1")
int bpf_prog1(struct sk_msg_md *msg)
{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
-
- char *d;
-
- if (data + 8 > data_end)
- return SK_DROP;
-
- bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
- d = (char *)data;
- bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
-
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d3597f81e6e9..8acdb99b5959 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -6,11 +6,8 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
+int page_size = 0; /* userspace should set it */
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -90,7 +87,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
+ if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */
return 1;
@@ -161,7 +158,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
- if (ctx->optlen != PAGE_SIZE * 2)
+ if (ctx->optlen != page_size * 2)
return 0; /* EPERM, unexpected data size */
if (optval + 1 > optval_end)
@@ -175,7 +172,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
+ if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */
return 1;
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 4eb42cff5fe9..5a5cc19a15bf 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4096);
__uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
__type(key, __u32);
__type(value, char);
@@ -17,7 +16,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 512 * 4); /* at least 4 pages of data */
__uint(map_flags, BPF_F_MMAPABLE);
__type(key, __u32);
__type(value, __u64);
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 8ba9959b036b..6b3f288b7c63 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -15,7 +15,6 @@ struct sample {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf SEC(".maps");
/* inputs */
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index edf3b6953533..197b86546dca 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,7 +15,6 @@ struct sample {
struct ringbuf_map {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf1 SEC(".maps"),
ringbuf2 SEC(".maps");
@@ -31,6 +30,17 @@ struct {
},
};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_hash SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ },
+};
+
/* inputs */
int pid = 0;
int target_ring = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
new file mode 100644
index 000000000000..951a0301c553
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char num_out[64] = {};
+long num_ret = 0;
+
+char ip_out[64] = {};
+long ip_ret = 0;
+
+char sym_out[64] = {};
+long sym_ret = 0;
+
+char addr_out[64] = {};
+long addr_ret = 0;
+
+char str_out[64] = {};
+long str_ret = 0;
+
+char over_out[6] = {};
+long over_ret = 0;
+
+char pad_out[10] = {};
+long pad_ret = 0;
+
+char noarg_out[64] = {};
+long noarg_ret = 0;
+
+long nobuf_ret = 0;
+
+extern const void schedule __ksym;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ /* Convenient values to pretty-print */
+ const __u8 ex_ipv4[] = {127, 0, 0, 1};
+ const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static const char str1[] = "str1";
+ static const char longstr[] = "longstr";
+
+ /* Integer types */
+ num_ret = BPF_SNPRINTF(num_out, sizeof(num_out),
+ "%d %u %x %li %llu %lX",
+ -8, 9, 150, -424242, 1337, 0xDABBAD00);
+ /* IP addresses */
+ ip_ret = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6",
+ &ex_ipv4, &ex_ipv6);
+ /* Symbol lookup formatting */
+ sym_ret = BPF_SNPRINTF(sym_out, sizeof(sym_out), "%ps %pS %pB",
+ &schedule, &schedule, &schedule);
+ /* Kernel pointers */
+ addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
+ 0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
+ /* Strings embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
+ str1, longstr);
+ /* Overflow */
+ over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
+ /* Padding of fixed width numbers */
+ pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4);
+ /* No args */
+ noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case");
+ /* No buffer */
+ nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
new file mode 100644
index 000000000000..402adaf344f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* The format string is filled from the userspace such that loading fails */
+static const char fmt[10];
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ unsigned long long arg = 42;
+
+ bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index ba6eadfec565..e7b673117436 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -396,7 +396,7 @@ int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
SEC("geneve_set_tunnel")
int _geneve_set_tunnel(struct __sk_buff *skb)
{
- int ret, ret2;
+ int ret;
struct bpf_tunnel_key key;
struct geneve_opt gopt;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index e87c8546230e..ee7e3b45182a 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -210,7 +210,7 @@ extern int test__join_cgroup(const char *path);
#define ASSERT_ERR_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = IS_ERR(___res) \
+ bool ___ok = IS_ERR(___res); \
CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
___ok; \
})
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
index 69b048cf46d9..3e024c891178 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
@@ -42,3 +42,46 @@
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
+{
+ "bpf_get_task_stack return R0 range is refined",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_6, 0), // ctx->meta->seq
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 8), // ctx->task
+ BPF_LD_MAP_FD(BPF_REG_1, 0), // fixup_map_array_48b
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // keep buf for seq_write
+ BPF_MOV64_IMM(BPF_REG_3, 48),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_EMIT_CALL(BPF_FUNC_get_task_stack),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_seq_write),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .kfunc = "task",
+ .runs = -1, // Don't run, just load
+ .fixup_map_array_48b = { 3 },
+},
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index a5ce26d548e4..9a41d8bb9ff1 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -1,6 +1,10 @@
# This mimics the top-level Makefile. We do it explicitly here so that this
# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+CC := clang
+else
CC := $(CROSS_COMPILE)gcc
+endif
ifeq (0,$(MAKELEVEL))
ifeq ($(OUTPUT),)