aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst30
-rw-r--r--Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml102
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/boot/dts/uniphier-pxs2.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi2
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi4
-rw-r--r--drivers/net/bonding/bond_main.c7
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c152
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h23
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c74
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h1
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c10
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c1
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c276
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h193
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c293
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c256
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c14
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_devlink.c1
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c35
-rw-r--r--drivers/net/ethernet/sfc/ef10.c3
-rw-r--r--drivers/net/ethernet/sfc/farch.c16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c12
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c4
-rw-r--r--drivers/net/ethernet/xscale/Kconfig1
-rw-r--r--drivers/net/ethernet/xscale/ixp4xx_eth.c215
-rw-r--r--drivers/net/geneve.c4
-rw-r--r--drivers/net/hyperv/netvsc_drv.c14
-rw-r--r--drivers/net/macvlan.c19
-rw-r--r--drivers/net/phy/intel-xway.c21
-rw-r--r--drivers/net/phy/marvell.c52
-rw-r--r--drivers/net/usb/r8152.c14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c7
-rw-r--r--include/linux/bpf-cgroup.h1
-rw-r--r--include/linux/bpf.h23
-rw-r--r--include/linux/bpf_verifier.h9
-rw-r--r--include/linux/mlx5/eswitch.h11
-rw-r--r--include/linux/mlx5/vport.h8
-rw-r--r--include/linux/netfilter/nfnetlink.h33
-rw-r--r--include/linux/netfilter/x_tables.h16
-rw-r--r--include/linux/netfilter_arp/arp_tables.h8
-rw-r--r--include/linux/netfilter_bridge/ebtables.h9
-rw-r--r--include/linux/netfilter_ipv4/ip_tables.h11
-rw-r--r--include/linux/netfilter_ipv6/ip6_tables.h11
-rw-r--r--include/linux/platform_data/eth_ixp4xx.h2
-rw-r--r--include/linux/skmsg.h5
-rw-r--r--include/net/devlink.h5
-rw-r--r--include/net/netfilter/ipv4/nf_defrag_ipv4.h3
-rw-r--r--include/net/netfilter/ipv6/nf_defrag_ipv6.h3
-rw-r--r--include/net/netfilter/nf_nat.h2
-rw-r--r--include/net/netfilter/nf_tables.h8
-rw-r--r--include/net/netns/ipv4.h10
-rw-r--r--include/net/netns/ipv6.h9
-rw-r--r--include/net/netns/x_tables.h8
-rw-r--r--include/net/sock.h5
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/udp.h2
-rw-r--r--include/uapi/linux/bpf.h67
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h4
-rw-r--r--kernel/bpf/core.c7
-rw-r--r--kernel/bpf/helpers.c306
-rw-r--r--kernel/bpf/inode.c2
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/verifier.c84
-rw-r--r--kernel/trace/bpf_trace.c373
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan.h4
-rw-r--r--net/bridge/netfilter/ebt_limit.c4
-rw-r--r--net/bridge/netfilter/ebt_mark.c4
-rw-r--r--net/bridge/netfilter/ebt_mark_m.c4
-rw-r--r--net/bridge/netfilter/ebtable_broute.c10
-rw-r--r--net/bridge/netfilter/ebtable_filter.c26
-rw-r--r--net/bridge/netfilter/ebtable_nat.c27
-rw-r--r--net/bridge/netfilter/ebtables.c54
-rw-r--r--net/core/dev.c14
-rw-r--r--net/core/devlink.c11
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/sock_map.c5
-rw-r--r--net/ipv4/netfilter/arp_tables.c73
-rw-r--r--net/ipv4/netfilter/arptable_filter.c17
-rw-r--r--net/ipv4/netfilter/ip_tables.c86
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c8
-rw-r--r--net/ipv4/netfilter/iptable_filter.c17
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c23
-rw-r--r--net/ipv4/netfilter/iptable_nat.c59
-rw-r--r--net/ipv4/netfilter/iptable_raw.c17
-rw-r--r--net/ipv4/netfilter/iptable_security.c17
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c30
-rw-r--r--net/ipv4/tcp_bpf.c3
-rw-r--r--net/ipv4/udp_bpf.c5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c84
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c17
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c24
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c58
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c17
-rw-r--r--net/ipv6/netfilter/ip6table_security.c17
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c29
-rw-r--r--net/mptcp/protocol.c25
-rw-r--r--net/netfilter/Kconfig10
-rw-r--r--net/netfilter/ipset/ip_set_core.c165
-rw-r--r--net/netfilter/nf_conntrack_netlink.c302
-rw-r--r--net/netfilter/nf_conntrack_proto.c8
-rw-r--r--net/netfilter/nf_log_syslog.c1
-rw-r--r--net/netfilter/nf_nat_core.c37
-rw-r--r--net/netfilter/nf_nat_proto.c38
-rw-r--r--net/netfilter/nf_tables_api.c663
-rw-r--r--net/netfilter/nf_tables_offload.c10
-rw-r--r--net/netfilter/nfnetlink.c58
-rw-r--r--net/netfilter/nfnetlink_acct.c80
-rw-r--r--net/netfilter/nfnetlink_cthelper.c57
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c146
-rw-r--r--net/netfilter/nfnetlink_log.c42
-rw-r--r--net/netfilter/nfnetlink_osf.c21
-rw-r--r--net/netfilter/nfnetlink_queue.c86
-rw-r--r--net/netfilter/nft_chain_filter.c5
-rw-r--r--net/netfilter/nft_compat.c33
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_socket.c48
-rw-r--r--net/netfilter/nft_tproxy.c24
-rw-r--r--net/netfilter/x_tables.c34
-rw-r--r--net/netfilter/xt_TPROXY.c13
-rw-r--r--net/netfilter/xt_limit.c6
-rw-r--r--net/netfilter/xt_socket.c14
-rw-r--r--net/openvswitch/meter.c4
-rw-r--r--net/qrtr/mhi.c8
-rw-r--r--net/sched/sch_taprio.c6
-rw-r--r--net/vmw_vsock/virtio_transport_common.c28
-rw-r--r--net/vmw_vsock/vmci_transport.c3
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--samples/bpf/tracex1_kern.c4
-rwxr-xr-xscripts/link-vmlinux.sh7
-rw-r--r--tools/bpf/bpftool/btf.c30
-rw-r--r--tools/bpf/bpftool/net.c2
-rw-r--r--tools/include/uapi/linux/bpf.h83
-rw-r--r--tools/lib/bpf/bpf_helpers.h21
-rw-r--r--tools/lib/bpf/bpf_tracing.h58
-rw-r--r--tools/lib/bpf/btf.c5
-rw-r--r--tools/lib/bpf/libbpf.c396
-rw-r--r--tools/lib/bpf/libbpf.h1
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/libbpf_internal.h45
-rw-r--r--tools/lib/bpf/linker.c1272
-rw-r--r--tools/scripts/Makefile.include12
-rw-r--r--tools/testing/selftests/bpf/Makefile28
-rw-r--r--tools/testing/selftests/bpf/README.rst9
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_funcs.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_maps.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_vars.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c27
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c73
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c73
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c82
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps2.c76
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars1.c54
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars2.c55
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c12
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c2
-rw-r--r--tools/testing/selftests/bpf/test_progs.h2
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_get_stack.c43
-rw-r--r--tools/testing/selftests/lib.mk4
193 files changed, 6005 insertions, 2830 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 2ed89abbf9a4..253496af8fef 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -29,7 +29,7 @@ list:
This may also include issues related to XDP, BPF tracing, etc.
Given netdev has a high volume of traffic, please also add the BPF
-maintainers to Cc (from kernel MAINTAINERS_ file):
+maintainers to Cc (from kernel ``MAINTAINERS`` file):
* Alexei Starovoitov <[email protected]>
* Daniel Borkmann <[email protected]>
@@ -234,11 +234,11 @@ be subject to change.
Q: samples/bpf preference vs selftests?
---------------------------------------
-Q: When should I add code to `samples/bpf/`_ and when to BPF kernel
-selftests_ ?
+Q: When should I add code to ``samples/bpf/`` and when to BPF kernel
+selftests_?
A: In general, we prefer additions to BPF kernel selftests_ rather than
-`samples/bpf/`_. The rationale is very simple: kernel selftests are
+``samples/bpf/``. The rationale is very simple: kernel selftests are
regularly run by various bots to test for kernel regressions.
The more test cases we add to BPF selftests, the better the coverage
@@ -246,9 +246,9 @@ and the less likely it is that those could accidentally break. It is
not that BPF kernel selftests cannot demo how a specific feature can
be used.
-That said, `samples/bpf/`_ may be a good place for people to get started,
+That said, ``samples/bpf/`` may be a good place for people to get started,
so it might be advisable that simple demos of features could go into
-`samples/bpf/`_, but advanced functional and corner-case testing rather
+``samples/bpf/``, but advanced functional and corner-case testing rather
into kernel selftests.
If your sample looks like a test case, then go for BPF kernel selftests
@@ -449,6 +449,19 @@ from source at
https://github.com/acmel/dwarves
+pahole starts to use libbpf definitions and APIs since v1.13 after the
+commit 21507cd3e97b ("pahole: add libbpf as submodule under lib/bpf").
+It works well with the git repository because the libbpf submodule will
+use "git submodule update --init --recursive" to update.
+
+Unfortunately, the default github release source code does not contain
+libbpf submodule source code and this will cause build issues, the tarball
+from https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ is same with
+github, you can get the source tarball with corresponding libbpf submodule
+codes from
+
+https://fedorapeople.org/~acme/dwarves
+
Some distros have pahole version 1.16 packaged already, e.g.
Fedora, Gentoo.
@@ -645,10 +658,9 @@ when:
.. Links
.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _MAINTAINERS: ../../MAINTAINERS
.. _netdev-FAQ: ../networking/netdev-FAQ.rst
-.. _samples/bpf/: ../../samples/bpf/
-.. _selftests: ../../tools/testing/selftests/bpf/
+.. _selftests:
+ https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
.. _Documentation/dev-tools/kselftest.rst:
https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
.. _Documentation/bpf/btf.rst: btf.rst
diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
new file mode 100644
index 000000000000..f2e91d1bf7d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/intel,ixp4xx-ethernet.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx ethernet
+
+allOf:
+ - $ref: "ethernet-controller.yaml#"
+
+maintainers:
+ - Linus Walleij <[email protected]>
+
+description: |
+ The Intel IXP4xx ethernet makes use of the IXP4xx NPE (Network
+ Processing Engine) and the IXP4xx Queue Manager to process
+ the ethernet frames. It can optionally contain an MDIO bus to
+ talk to PHYs.
+
+properties:
+ compatible:
+ const: intel,ixp4xx-ethernet
+
+ reg:
+ maxItems: 1
+ description: Ethernet MMIO address range
+
+ queue-rx:
+ $ref: '/schemas/types.yaml#/definitions/phandle-array'
+ maxItems: 1
+ description: phandle to the RX queue on the NPE
+
+ queue-txready:
+ $ref: '/schemas/types.yaml#/definitions/phandle-array'
+ maxItems: 1
+ description: phandle to the TX READY queue on the NPE
+
+ phy-mode: true
+
+ phy-handle: true
+
+ intel,npe-handle:
+ $ref: '/schemas/types.yaml#/definitions/phandle-array'
+ maxItems: 1
+ description: phandle to the NPE this ethernet instance is using
+ and the instance to use in the second cell
+
+ mdio:
+ type: object
+ $ref: "mdio.yaml#"
+ description: optional node for embedded MDIO controller
+
+required:
+ - compatible
+ - reg
+ - queue-rx
+ - queue-txready
+ - intel,npe-handle
+
+additionalProperties: false
+
+examples:
+ - |
+ npe: npe@c8006000 {
+ compatible = "intel,ixp4xx-network-processing-engine";
+ reg = <0xc8006000 0x1000>, <0xc8007000 0x1000>, <0xc8008000 0x1000>;
+ };
+
+ ethernet@c8009000 {
+ compatible = "intel,ixp4xx-ethernet";
+ reg = <0xc8009000 0x1000>;
+ status = "disabled";
+ queue-rx = <&qmgr 4>;
+ queue-txready = <&qmgr 21>;
+ intel,npe-handle = <&npe 1>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy1>;
+ };
+
+ ethernet@c800c000 {
+ compatible = "intel,ixp4xx-ethernet";
+ reg = <0xc800c000 0x1000>;
+ status = "disabled";
+ queue-rx = <&qmgr 3>;
+ queue-txready = <&qmgr 20>;
+ intel,npe-handle = <&npe 2>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy2>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ };
+ phy2: ethernet-phy@2 {
+ reg = <2>;
+ };
+ };
+ };
diff --git a/MAINTAINERS b/MAINTAINERS
index c3c8fa572580..0d85ae9e61e2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8532,7 +8532,6 @@ IBM Power SRIOV Virtual NIC Device Driver
M: Dany Madden <[email protected]>
M: Sukadev Bhattiprolu <[email protected]>
R: Thomas Falcon <[email protected]>
-R: Lijun Pan <[email protected]>
S: Supported
F: drivers/net/ethernet/ibm/ibmvnic.*
diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi
index b0b15c97306b..e81e5937a60a 100644
--- a/arch/arm/boot/dts/uniphier-pxs2.dtsi
+++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi
@@ -583,7 +583,7 @@
clocks = <&sys_clk 6>;
reset-names = "ether";
resets = <&sys_rst 6>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 0>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index a87b8a678719..8f2c1c1e2c64 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -734,7 +734,7 @@
clocks = <&sys_clk 6>;
reset-names = "ether";
resets = <&sys_rst 6>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 0>;
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 0e52dadf54b3..be97da132258 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -564,7 +564,7 @@
clocks = <&sys_clk 6>;
reset-names = "ether";
resets = <&sys_rst 6>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 0>;
@@ -585,7 +585,7 @@
clocks = <&sys_clk 7>;
reset-names = "ether";
resets = <&sys_rst 7>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
local-mac-address = [00 00 00 00 00 00];
socionext,syscon-phy-mode = <&soc_glue 1>;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d5ca38aa8aa9..20bbda1b36e1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4391,9 +4391,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
int agg_id = 0;
int ret = 0;
-#ifdef CONFIG_LOCKDEP
- WARN_ON(lockdep_is_held(&bond->mode_lock));
-#endif
+ might_sleep();
usable_slaves = kzalloc(struct_size(usable_slaves, arr,
bond->slave_cnt), GFP_KERNEL);
@@ -4406,7 +4404,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info ad_info;
+ spin_lock_bh(&bond->mode_lock);
if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
+ spin_unlock_bh(&bond->mode_lock);
pr_debug("bond_3ad_get_active_agg_info failed\n");
/* No active aggragator means it's not safe to use
* the previous array.
@@ -4414,6 +4414,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
bond_reset_slave_arr(bond);
goto out;
}
+ spin_unlock_bh(&bond->mode_lock);
agg_id = ad_info.aggregator_id;
}
bond_for_each_slave(bond, slave, iter) {
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index f78daba60b35..aa412506832d 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -2853,8 +2853,7 @@ static void pcnet32_check_media(struct net_device *dev, int verbose)
netif_info(lp, link, dev, "link down\n");
}
if (lp->phycount > 1) {
- curr_link = pcnet32_check_otherphy(dev);
- prev_link = 0;
+ pcnet32_check_otherphy(dev);
}
} else if (verbose || !prev_link) {
netif_carrier_on(dev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e15d454e33f0..39ac9e2f5118 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -122,7 +122,10 @@ enum board_idx {
NETXTREME_E_VF,
NETXTREME_C_VF,
NETXTREME_S_VF,
+ NETXTREME_C_VF_HV,
+ NETXTREME_E_VF_HV,
NETXTREME_E_P5_VF,
+ NETXTREME_E_P5_VF_HV,
};
/* indexed by enum above */
@@ -170,7 +173,10 @@ static const struct {
[NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
[NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
[NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" },
+ [NETXTREME_C_VF_HV] = { "Broadcom NetXtreme-C Virtual Function for Hyper-V" },
+ [NETXTREME_E_VF_HV] = { "Broadcom NetXtreme-E Virtual Function for Hyper-V" },
[NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" },
+ [NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" },
};
static const struct pci_device_id bnxt_pci_tbl[] = {
@@ -222,15 +228,25 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
#ifdef CONFIG_BNXT_SRIOV
{ PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF },
+ { PCI_VDEVICE(BROADCOM, 0x1607), .driver_data = NETXTREME_E_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x1608), .driver_data = NETXTREME_E_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF },
+ { PCI_VDEVICE(BROADCOM, 0x16bd), .driver_data = NETXTREME_E_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF },
+ { PCI_VDEVICE(BROADCOM, 0x16c2), .driver_data = NETXTREME_C_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x16c3), .driver_data = NETXTREME_C_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x16c4), .driver_data = NETXTREME_E_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x16c5), .driver_data = NETXTREME_E_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF },
{ PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF },
{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF },
{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF },
{ PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF },
+ { PCI_VDEVICE(BROADCOM, 0x16e6), .driver_data = NETXTREME_C_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0x1806), .driver_data = NETXTREME_E_P5_VF },
{ PCI_VDEVICE(BROADCOM, 0x1807), .driver_data = NETXTREME_E_P5_VF },
+ { PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV },
+ { PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV },
{ PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF },
#endif
{ 0 }
@@ -265,7 +281,8 @@ static struct workqueue_struct *bnxt_pf_wq;
static bool bnxt_vf_pciid(enum board_idx idx)
{
return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
- idx == NETXTREME_S_VF || idx == NETXTREME_E_P5_VF);
+ idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV ||
+ idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF);
}
#define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID)
@@ -358,6 +375,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct pci_dev *pdev = bp->pdev;
struct bnxt_tx_ring_info *txr;
struct bnxt_sw_tx_bd *tx_buf;
+ __le32 lflags = 0;
i = skb_get_queue_mapping(skb);
if (unlikely(i >= bp->tx_nr_rings)) {
@@ -399,6 +417,11 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
}
+ if (unlikely(skb->no_fcs)) {
+ lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
+ goto normal_tx;
+ }
+
if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
struct tx_push_buffer *tx_push_buf = txr->tx_push;
struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
@@ -500,7 +523,7 @@ normal_tx:
txbd1 = (struct tx_bd_ext *)
&txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
- txbd1->tx_bd_hsize_lflags = 0;
+ txbd1->tx_bd_hsize_lflags = lflags;
if (skb_is_gso(skb)) {
u32 hdr_len;
@@ -512,14 +535,14 @@ normal_tx:
hdr_len = skb_transport_offset(skb) +
tcp_hdrlen(skb);
- txbd1->tx_bd_hsize_lflags = cpu_to_le32(TX_BD_FLAGS_LSO |
+ txbd1->tx_bd_hsize_lflags |= cpu_to_le32(TX_BD_FLAGS_LSO |
TX_BD_FLAGS_T_IPID |
(hdr_len << (TX_BD_HSIZE_SHIFT - 1)));
length = skb_shinfo(skb)->gso_size;
txbd1->tx_bd_mss = cpu_to_le32(length);
length += hdr_len;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- txbd1->tx_bd_hsize_lflags =
+ txbd1->tx_bd_hsize_lflags |=
cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM);
txbd1->tx_bd_mss = 0;
}
@@ -1732,14 +1755,16 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
cons = rxcmp->rx_cmp_opaque;
if (unlikely(cons != rxr->rx_next_cons)) {
- int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
+ int rc1 = bnxt_discard_rx(bp, cpr, &tmp_raw_cons, rxcmp);
/* 0xffff is forced error, don't print it */
if (rxr->rx_next_cons != 0xffff)
netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
cons, rxr->rx_next_cons);
bnxt_sched_reset(bp, rxr);
- return rc1;
+ if (rc1)
+ return rc1;
+ goto next_rx_no_prod_no_len;
}
rx_buf = &rxr->rx_buf_ring[cons];
data = rx_buf->data;
@@ -4145,7 +4170,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
bnxt_free_ntp_fltrs(bp, irq_re_init);
if (irq_re_init) {
bnxt_free_ring_stats(bp);
- if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET) ||
+ if (!(bp->phy_flags & BNXT_PHY_FL_PORT_STATS_NO_RESET) ||
test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
bnxt_free_port_stats(bp);
bnxt_free_ring_grps(bp);
@@ -8340,11 +8365,11 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
#endif
}
-/* Allow PF and VF with default VLAN to be in promiscuous mode */
+/* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */
static bool bnxt_promisc_ok(struct bnxt *bp)
{
#ifdef CONFIG_BNXT_SRIOV
- if (BNXT_VF(bp) && !bp->vf.vlan)
+ if (BNXT_VF(bp) && !bp->vf.vlan && !bnxt_is_trusted_vf(bp, &bp->vf))
return false;
#endif
return true;
@@ -8441,7 +8466,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
if (bp->dev->flags & IFF_BROADCAST)
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
- if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
+ if (bp->dev->flags & IFF_PROMISC)
vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
if (bp->dev->flags & IFF_ALLMULTI) {
@@ -9075,8 +9100,9 @@ static char *bnxt_report_fec(struct bnxt_link_info *link_info)
static void bnxt_report_link(struct bnxt *bp)
{
if (bp->link_info.link_up) {
- const char *duplex;
+ const char *signal = "";
const char *flow_ctrl;
+ const char *duplex;
u32 speed;
u16 fec;
@@ -9098,9 +9124,24 @@ static void bnxt_report_link(struct bnxt *bp)
flow_ctrl = "ON - receive";
else
flow_ctrl = "none";
- netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n",
- speed, duplex, flow_ctrl);
- if (bp->flags & BNXT_FLAG_EEE_CAP)
+ if (bp->link_info.phy_qcfg_resp.option_flags &
+ PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN) {
+ u8 sig_mode = bp->link_info.active_fec_sig_mode &
+ PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK;
+ switch (sig_mode) {
+ case PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ:
+ signal = "(NRZ) ";
+ break;
+ case PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4:
+ signal = "(PAM4) ";
+ break;
+ default:
+ break;
+ }
+ }
+ netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s%s duplex, Flow control: %s\n",
+ speed, signal, duplex, flow_ctrl);
+ if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP)
netdev_info(bp->dev, "EEE is %s\n",
bp->eee.eee_active ? "active" :
"not active");
@@ -9132,10 +9173,6 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
struct bnxt_link_info *link_info = &bp->link_info;
- bp->flags &= ~BNXT_FLAG_EEE_CAP;
- if (bp->test_info)
- bp->test_info->flags &= ~(BNXT_TEST_FL_EXT_LPBK |
- BNXT_TEST_FL_AN_PHY_LPBK);
if (bp->hwrm_spec_code < 0x10201)
return 0;
@@ -9146,31 +9183,17 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
if (rc)
goto hwrm_phy_qcaps_exit;
+ bp->phy_flags = resp->flags;
if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) {
struct ethtool_eee *eee = &bp->eee;
u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode);
- bp->flags |= BNXT_FLAG_EEE_CAP;
eee->supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
bp->lpi_tmr_lo = le32_to_cpu(resp->tx_lpi_timer_low) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_MASK;
bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
}
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED) {
- if (bp->test_info)
- bp->test_info->flags |= BNXT_TEST_FL_EXT_LPBK;
- }
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED) {
- if (bp->test_info)
- bp->test_info->flags |= BNXT_TEST_FL_AN_PHY_LPBK;
- }
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED) {
- if (BNXT_PF(bp))
- bp->fw_cap |= BNXT_FW_CAP_SHARED_PORT_CFG;
- }
- if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET)
- bp->fw_cap |= BNXT_FW_CAP_PORT_STATS_NO_RESET;
if (bp->hwrm_spec_code >= 0x10a01) {
if (bnxt_phy_qcaps_no_speed(resp)) {
@@ -9261,7 +9284,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
PORT_PHY_QCFG_RESP_PHY_ADDR_MASK;
link_info->module_status = resp->module_status;
- if (bp->flags & BNXT_FLAG_EEE_CAP) {
+ if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP) {
struct ethtool_eee *eee = &bp->eee;
u16 fw_speeds;
@@ -9497,7 +9520,8 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
if (!BNXT_SINGLE_PF(bp))
return 0;
- if (pci_num_vf(bp->pdev))
+ if (pci_num_vf(bp->pdev) &&
+ !(bp->phy_flags & BNXT_PHY_FL_FW_MANAGED_LKDN))
return 0;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
@@ -9782,7 +9806,9 @@ static ssize_t bnxt_show_temp(struct device *dev,
if (!rc)
len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */
mutex_unlock(&bp->hwrm_cmd_lock);
- return rc ?: len;
+ if (rc)
+ return rc;
+ return len;
}
static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0);
@@ -9839,7 +9865,7 @@ static bool bnxt_eee_config_ok(struct bnxt *bp)
struct ethtool_eee *eee = &bp->eee;
struct bnxt_link_info *link_info = &bp->link_info;
- if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+ if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
return true;
if (eee->eee_enabled) {
@@ -10486,7 +10512,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST |
CFA_L2_SET_RX_MASK_REQ_MASK_BCAST);
- if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
+ if (dev->flags & IFF_PROMISC)
mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
uc_update = bnxt_uc_list_updated(bp);
@@ -10562,6 +10588,9 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
}
skip_uc:
+ if ((vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS) &&
+ !bnxt_promisc_ok(bp))
+ vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
if (rc && vnic->mc_list_count) {
netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
@@ -10756,6 +10785,40 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
return rc;
}
+static netdev_features_t bnxt_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ struct bnxt *bp;
+ __be16 udp_port;
+ u8 l4_proto = 0;
+
+ features = vlan_features_check(skb, features);
+ if (!skb->encapsulation)
+ return features;
+
+ switch (vlan_get_protocol(skb)) {
+ case htons(ETH_P_IP):
+ l4_proto = ip_hdr(skb)->protocol;
+ break;
+ case htons(ETH_P_IPV6):
+ l4_proto = ipv6_hdr(skb)->nexthdr;
+ break;
+ default:
+ return features;
+ }
+
+ if (l4_proto != IPPROTO_UDP)
+ return features;
+
+ bp = netdev_priv(dev);
+ /* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
+ udp_port = udp_hdr(skb)->dest;
+ if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)
+ return features;
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
u32 *reg_buf)
{
@@ -12263,10 +12326,13 @@ static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
unsigned int cmd;
udp_tunnel_nic_get_port(netdev, table, 0, &ti);
- if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
+ if (ti.type == UDP_TUNNEL_TYPE_VXLAN) {
+ bp->vxlan_port = ti.port;
cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
- else
+ } else {
+ bp->nge_port = ti.port;
cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
+ }
if (ti.port)
return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd);
@@ -12366,6 +12432,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
.ndo_change_mtu = bnxt_change_mtu,
.ndo_fix_features = bnxt_fix_features,
.ndo_set_features = bnxt_set_features,
+ .ndo_features_check = bnxt_features_check,
.ndo_tx_timeout = bnxt_tx_timeout,
#ifdef CONFIG_BNXT_SRIOV
.ndo_get_vf_config = bnxt_get_vf_config,
@@ -12434,12 +12501,17 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
int rc = 0;
struct bnxt_link_info *link_info = &bp->link_info;
+ bp->phy_flags = 0;
rc = bnxt_hwrm_phy_qcaps(bp);
if (rc) {
netdev_err(bp->dev, "Probe phy can't get phy capabilities (rc: %x)\n",
rc);
return rc;
}
+ if (bp->phy_flags & BNXT_PHY_FL_NO_FCS)
+ bp->dev->priv_flags |= IFF_SUPP_NOFCS;
+ else
+ bp->dev->priv_flags &= ~IFF_SUPP_NOFCS;
if (!fw_dflt)
return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 29061c577baa..24d2ad6a8740 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1341,9 +1341,6 @@ struct bnxt_led_info {
struct bnxt_test_info {
u8 offline_mask;
- u8 flags;
-#define BNXT_TEST_FL_EXT_LPBK 0x1
-#define BNXT_TEST_FL_AN_PHY_LPBK 0x2
u16 timeout;
char string[BNXT_MAX_TEST][ETH_GSTRING_LEN];
};
@@ -1693,7 +1690,6 @@ struct bnxt {
#define BNXT_FLAG_SHARED_RINGS 0x200
#define BNXT_FLAG_PORT_STATS 0x400
#define BNXT_FLAG_UDP_RSS_CAP 0x800
- #define BNXT_FLAG_EEE_CAP 0x1000
#define BNXT_FLAG_NEW_RSS_CAP 0x2000
#define BNXT_FLAG_WOL_CAP 0x4000
#define BNXT_FLAG_ROCEV1_CAP 0x8000
@@ -1720,8 +1716,10 @@ struct bnxt {
#define BNXT_NPAR(bp) ((bp)->port_partition_type)
#define BNXT_MH(bp) ((bp)->flags & BNXT_FLAG_MULTI_HOST)
#define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp))
+#define BNXT_SH_PORT_CFG_OK(bp) (BNXT_PF(bp) && \
+ ((bp)->phy_flags & BNXT_PHY_FL_SHARED_PORT_CFG))
#define BNXT_PHY_CFG_ABLE(bp) ((BNXT_SINGLE_PF(bp) || \
- ((bp)->fw_cap & BNXT_FW_CAP_SHARED_PORT_CFG)) && \
+ BNXT_SH_PORT_CFG_OK(bp)) && \
(bp)->link_info.phy_state == BNXT_PHY_STATE_ENABLED)
#define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
#define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
@@ -1871,11 +1869,9 @@ struct bnxt {
#define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000
#define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000
#define BNXT_FW_CAP_HOT_RESET 0x00200000
- #define BNXT_FW_CAP_SHARED_PORT_CFG 0x00400000
#define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000
#define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000
#define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000
- #define BNXT_FW_CAP_PORT_STATS_NO_RESET 0x10000000
#define BNXT_FW_CAP_RING_MONITOR 0x40000000
#define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
@@ -1918,6 +1914,8 @@ struct bnxt {
u16 vxlan_fw_dst_port_id;
u16 nge_fw_dst_port_id;
+ __be16 vxlan_port;
+ __be16 nge_port;
u8 port_partition_type;
u8 port_count;
u16 br_mode;
@@ -2010,6 +2008,17 @@ struct bnxt {
u32 lpi_tmr_lo;
u32 lpi_tmr_hi;
+ /* copied from flags in hwrm_port_phy_qcaps_output */
+ u8 phy_flags;
+#define BNXT_PHY_FL_EEE_CAP PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED
+#define BNXT_PHY_FL_EXT_LPBK PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED
+#define BNXT_PHY_FL_AN_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED
+#define BNXT_PHY_FL_SHARED_PORT_CFG PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED
+#define BNXT_PHY_FL_PORT_STATS_NO_RESET PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET
+#define BNXT_PHY_FL_NO_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED
+#define BNXT_PHY_FL_FW_MANAGED_LKDN PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN
+#define BNXT_PHY_FL_NO_FCS PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS
+
u8 num_tests;
struct bnxt_test_info *test_info;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 3b66e300c962..c664ec52ebcf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2912,7 +2912,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
if (!BNXT_PHY_CFG_ABLE(bp))
return -EOPNOTSUPP;
- if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+ if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
return -EOPNOTSUPP;
mutex_lock(&bp->link_lock);
@@ -2963,7 +2963,7 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_eee *edata)
{
struct bnxt *bp = netdev_priv(dev);
- if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+ if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
return -EOPNOTSUPP;
*edata = bp->eee;
@@ -3215,7 +3215,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
int rc;
if (!link_info->autoneg ||
- (bp->test_info->flags & BNXT_TEST_FL_AN_PHY_LPBK))
+ (bp->phy_flags & BNXT_PHY_FL_AN_PHY_LPBK))
return 0;
rc = bnxt_query_force_speeds(bp, &fw_advertising);
@@ -3416,7 +3416,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
}
if ((etest->flags & ETH_TEST_FL_EXTERNAL_LB) &&
- (bp->test_info->flags & BNXT_TEST_FL_EXT_LPBK))
+ (bp->phy_flags & BNXT_PHY_FL_EXT_LPBK))
do_ext_lpbk = true;
if (etest->flags & ETH_TEST_FL_OFFLINE) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index a217316228f4..eb00a219aa51 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -49,10 +49,6 @@ static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
{
- if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
- netdev_err(bp->dev, "vf ndo called though PF is down\n");
- return -EINVAL;
- }
if (!bp->pf.active_vfs) {
netdev_err(bp->dev, "vf ndo called though sriov is disabled\n");
return -EINVAL;
@@ -113,7 +109,7 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
int rc;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
- req.fid = cpu_to_le16(vf->fw_fid);
+ req.fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff);
mutex_lock(&bp->hwrm_cmd_lock);
rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc) {
@@ -125,9 +121,9 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
return 0;
}
-static bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
+bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
{
- if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
+ if (BNXT_PF(bp) && !(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
return !!(vf->flags & BNXT_VF_TRUST);
bnxt_hwrm_func_qcfg_flags(bp, vf);
@@ -1120,10 +1116,38 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp)
}
}
+int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
+{
+ struct hwrm_func_vf_cfg_input req = {0};
+ int rc = 0;
+
+ if (!BNXT_VF(bp))
+ return 0;
+
+ if (bp->hwrm_spec_code < 0x10202) {
+ if (is_valid_ether_addr(bp->vf.mac_addr))
+ rc = -EADDRNOTAVAIL;
+ goto mac_done;
+ }
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
+ req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+ memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
+ rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+mac_done:
+ if (rc && strict) {
+ rc = -EADDRNOTAVAIL;
+ netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n",
+ mac);
+ return rc;
+ }
+ return 0;
+}
+
void bnxt_update_vf_mac(struct bnxt *bp)
{
struct hwrm_func_qcaps_input req = {0};
struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+ bool inform_pf = false;
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1);
req.fid = cpu_to_le16(0xffff);
@@ -1139,42 +1163,24 @@ void bnxt_update_vf_mac(struct bnxt *bp)
* default but the stored zero MAC will allow the VF user to change
* the random MAC address using ndo_set_mac_address() if he wants.
*/
- if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr))
+ if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr)) {
memcpy(bp->vf.mac_addr, resp->mac_address, ETH_ALEN);
+ /* This means we are now using our own MAC address, let
+ * the PF know about this MAC address.
+ */
+ if (!is_valid_ether_addr(bp->vf.mac_addr))
+ inform_pf = true;
+ }
/* overwrite netdev dev_addr with admin VF MAC */
if (is_valid_ether_addr(bp->vf.mac_addr))
memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
update_vf_mac_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
+ if (inform_pf)
+ bnxt_approve_mac(bp, bp->dev->dev_addr, false);
}
-int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
-{
- struct hwrm_func_vf_cfg_input req = {0};
- int rc = 0;
-
- if (!BNXT_VF(bp))
- return 0;
-
- if (bp->hwrm_spec_code < 0x10202) {
- if (is_valid_ether_addr(bp->vf.mac_addr))
- rc = -EADDRNOTAVAIL;
- goto mac_done;
- }
- bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
- req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
- memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
- rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-mac_done:
- if (rc && strict) {
- rc = -EADDRNOTAVAIL;
- netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n",
- mac);
- return rc;
- }
- return 0;
-}
#else
int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 629641bf6fc5..995535e4c11b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -34,6 +34,7 @@ int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
int bnxt_set_vf_bw(struct net_device *, int, int, int);
int bnxt_set_vf_link_state(struct net_device *, int, int);
int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
+bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf);
int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trust);
int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs);
int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index a3f5b80888e5..ef3f1e92632f 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -33,7 +33,6 @@ static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
if (unlikely(start < skb_linear_data_len)) {
frag_size = min(len, skb_linear_data_len - start);
- start = 0;
} else {
start -= skb_linear_data_len;
@@ -873,10 +872,10 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
}
/* update receive window */
if (first_wr || tx_info->prev_win != tcp_win) {
- pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
- TCB_RCV_WND_W,
- TCB_RCV_WND_V(TCB_RCV_WND_M),
- TCB_RCV_WND_V(tcp_win), 0);
+ chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+ TCB_RCV_WND_W,
+ TCB_RCV_WND_V(TCB_RCV_WND_M),
+ TCB_RCV_WND_V(tcp_win), 0);
tx_info->prev_win = tcp_win;
cpl++;
}
@@ -1485,7 +1484,6 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
wr->cookie = 0;
- pos += sizeof(*wr);
/* ULP_TXPKT */
ulptx = (struct ulp_txpkt *)(wr + 1);
ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index e7f7121821be..2a8bf53c2f75 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1522,7 +1522,6 @@ dm9000_probe(struct platform_device *pdev)
if (ret) {
dev_err(db->dev, "irq %d cannot set wakeup (%d)\n",
db->irq_wake, ret);
- ret = 0;
} else {
irq_set_irq_wake(db->irq_wake, 0);
db->wake_supported = 1;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index f08c420a5803..2768c78528a5 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -431,7 +431,8 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
netif_carrier_on(port->dev);
if (!delayed_work_pending(caching_dw))
queue_delayed_work(prestera_wq, caching_dw, 0);
- } else {
+ } else if (netif_running(port->dev) &&
+ netif_carrier_ok(port->dev)) {
netif_carrier_off(port->dev);
if (delayed_work_pending(caching_dw))
cancel_delayed_work(caching_dw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
index 3e19b1721303..0399a396d166 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -96,7 +96,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
}
if (!vport->egress.acl) {
- vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->egress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_EGRESS,
table_size);
if (IS_ERR(vport->egress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 26b37a0f8762..505bf811984a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -148,7 +148,7 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
esw_acl_egress_vlan_grp_destroy(vport);
}
-static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num)
+static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num)
{
return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num);
}
@@ -171,7 +171,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
table_size++;
if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
table_size++;
- vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->egress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
if (IS_ERR(vport->egress.acl)) {
err = PTR_ERR(vport->egress.acl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
index 4a369669e51e..45b839116212 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -6,14 +6,14 @@
#include "helper.h"
struct mlx5_flow_table *
-esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *acl;
int acl_supported;
- int vport_index;
+ u16 vport_num;
int err;
acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ?
@@ -23,11 +23,11 @@ esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
if (!acl_supported)
return ERR_PTR(-EOPNOTSUPP);
+ vport_num = vport->vport;
esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
- vport_index = mlx5_eswitch_vport_num_to_index(esw, vport_num);
- root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport_index);
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index);
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
vport_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
index 8dc4cab66a71..a47063fab57e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -8,7 +8,7 @@
/* General acl helper functions */
struct mlx5_flow_table *
-esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size);
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size);
/* Egress acl helper functions */
void esw_acl_egress_table_destroy(struct mlx5_vport *vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
index d64fad2823e7..f75b86abaf1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -177,7 +177,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
}
if (!vport->ingress.acl) {
- vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_INGRESS,
table_size);
if (IS_ERR(vport->ingress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
index 548c005ea633..39e948bc1204 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -7,7 +7,7 @@
#include "ofld.h"
static bool
-esw_acl_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
+esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw,
const struct mlx5_vport *vport)
{
return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
@@ -255,7 +255,7 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
if (esw_acl_ingress_prio_tag_enabled(esw, vport))
num_ftes++;
- vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+ vport->ingress.acl = esw_acl_table_create(esw, vport,
MLX5_FLOW_NAMESPACE_ESW_INGRESS,
num_ftes);
if (IS_ERR(vport->ingress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 7bfc84238b3d..1703384eca95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -14,8 +14,7 @@ mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_i
memcpy(ppid->id, &parent_id, sizeof(parent_id));
}
-static bool
-mlx5_esw_devlink_port_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num)
{
return vport_num == MLX5_VPORT_UPLINK ||
(mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
@@ -124,7 +123,7 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1
}
int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum)
+ u16 vport_num, u32 controller, u32 sfnum)
{
struct mlx5_core_dev *dev = esw->dev;
struct netdev_phys_item_id ppid = {};
@@ -142,7 +141,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
mlx5_esw_get_port_parent_id(dev, &ppid);
memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len);
dl_port->attrs.switch_id.id_len = ppid.id_len;
- devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum);
+ devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller);
devlink = priv_to_devlink(dev);
dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
err = devlink_port_register(devlink, dl_port, dl_port_index);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 8ab1224653a4..d9041b16611d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -216,7 +216,8 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
int esw_legacy_enable(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int ret, i;
+ unsigned long i;
+ int ret;
ret = esw_create_legacy_table(esw);
if (ret)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1bb229ecd43b..570f2280823c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -88,20 +88,17 @@ struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink)
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
{
- u16 idx;
+ struct mlx5_vport *vport;
if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
return ERR_PTR(-EPERM);
- idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-
- if (idx > esw->total_vports - 1) {
- esw_debug(esw->dev, "vport out of range: num(0x%x), idx(0x%x)\n",
- vport_num, idx);
+ vport = xa_load(&esw->vports, vport_num);
+ if (!vport) {
+ esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num);
return ERR_PTR(-EINVAL);
}
-
- return &esw->vports[idx];
+ return vport;
}
static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
@@ -345,9 +342,10 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
{
u8 *mac = vaddr->node.addr;
struct mlx5_vport *vport;
- u16 i, vport_num;
+ unsigned long i;
+ u16 vport_num;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
struct hlist_head *vport_hash = vport->mc_list;
struct vport_addr *iter_vaddr =
l2addr_hash_find(vport_hash,
@@ -1175,7 +1173,7 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
memset(&vport->qos, 0, sizeof(vport->qos));
@@ -1213,20 +1211,25 @@ void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
{
- int i;
+ struct mlx5_vport *vport;
+ unsigned long i;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i, num_vfs)
- mlx5_eswitch_unload_vport(esw, i);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ if (!vport->enabled)
+ continue;
+ mlx5_eswitch_unload_vport(esw, vport->vport);
+ }
}
int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
enum mlx5_eswitch_vport_event enabled_events)
{
+ struct mlx5_vport *vport;
+ unsigned long i;
int err;
- int i;
- mlx5_esw_for_each_vf_vport_num(esw, i, num_vfs) {
- err = mlx5_eswitch_load_vport(esw, i, enabled_events);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
if (err)
goto vf_err;
}
@@ -1234,7 +1237,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
return 0;
vf_err:
- mlx5_eswitch_unload_vf_vports(esw, i - 1);
+ mlx5_eswitch_unload_vf_vports(esw, num_vfs);
return err;
}
@@ -1563,24 +1566,161 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
up_write(&esw->mode_lock);
}
+static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
+{
+ u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+}
+
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id)
+
+{
+ int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_ctx;
+ void *hca_caps;
+ int err;
+
+ if (!mlx5_core_is_ecpf(dev)) {
+ *max_sfs = 0;
+ return 0;
+ }
+
+ query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+ if (!query_ctx)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_cap_host_pf(dev, query_ctx);
+ if (err)
+ goto out_free;
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+ *max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf);
+ *sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id);
+
+out_free:
+ kfree(query_ctx);
+ return err;
+}
+
+static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev,
+ int index, u16 vport_num)
+{
+ struct mlx5_vport *vport;
+ int err;
+
+ vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+ if (!vport)
+ return -ENOMEM;
+
+ vport->dev = esw->dev;
+ vport->vport = vport_num;
+ vport->index = index;
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler);
+ err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL);
+ if (err)
+ goto insert_err;
+
+ esw->total_vports++;
+ return 0;
+
+insert_err:
+ kfree(vport);
+ return err;
+}
+
+static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ xa_erase(&esw->vports, vport->vport);
+ kfree(vport);
+}
+
+static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+
+ mlx5_esw_for_each_vport(esw, i, vport)
+ mlx5_esw_vport_free(esw, vport);
+ xa_destroy(&esw->vports);
+}
+
+static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ u16 max_host_pf_sfs;
+ u16 base_sf_num;
+ int idx = 0;
+ int err;
+ int i;
+
+ xa_init(&esw->vports);
+
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF);
+ if (err)
+ goto err;
+ if (esw->first_host_vport == MLX5_VPORT_PF)
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+
+ for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, idx);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
+ xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+ idx++;
+ }
+ base_sf_num = mlx5_sf_start_function_id(dev);
+ for (i = 0; i < mlx5_sf_max_functions(dev); i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num);
+ if (err)
+ goto err;
+ for (i = 0; i < max_host_pf_sfs; i++) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+ if (err)
+ goto err;
+ xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+ idx++;
+ }
+
+ if (mlx5_ecpf_vport_exists(dev)) {
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF);
+ if (err)
+ goto err;
+ idx++;
+ }
+ err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK);
+ if (err)
+ goto err;
+ return 0;
+
+err:
+ mlx5_esw_vports_cleanup(esw);
+ return err;
+}
+
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
struct mlx5_eswitch *esw;
- struct mlx5_vport *vport;
- int total_vports;
- int err, i;
+ int err;
if (!MLX5_VPORT_MANAGER(dev))
return 0;
- total_vports = mlx5_eswitch_get_total_vports(dev);
-
- esw_info(dev,
- "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
- total_vports,
- MLX5_MAX_UC_PER_VPORT(dev),
- MLX5_MAX_MC_PER_VPORT(dev));
-
esw = kzalloc(sizeof(*esw), GFP_KERNEL);
if (!esw)
return -ENOMEM;
@@ -1595,18 +1735,13 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
goto abort;
}
- esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
- GFP_KERNEL);
- if (!esw->vports) {
- err = -ENOMEM;
+ err = mlx5_esw_vports_init(esw);
+ if (err)
goto abort;
- }
-
- esw->total_vports = total_vports;
err = esw_offloads_init_reps(esw);
if (err)
- goto abort;
+ goto reps_err;
mutex_init(&esw->offloads.encap_tbl_lock);
hash_init(esw->offloads.encap_tbl);
@@ -1619,25 +1754,25 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
mutex_init(&esw->state_lock);
init_rwsem(&esw->mode_lock);
- mlx5_esw_for_all_vports(esw, i, vport) {
- vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
- vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
- vport->dev = dev;
- INIT_WORK(&vport->vport_change_handler,
- esw_vport_change_handler);
- }
-
esw->enabled_vports = 0;
esw->mode = MLX5_ESWITCH_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+
+ esw_info(dev,
+ "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+ esw->total_vports,
+ MLX5_MAX_UC_PER_VPORT(dev),
+ MLX5_MAX_MC_PER_VPORT(dev));
return 0;
+
+reps_err:
+ mlx5_esw_vports_cleanup(esw);
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
- kfree(esw->vports);
kfree(esw);
return err;
}
@@ -1659,7 +1794,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
mutex_destroy(&esw->offloads.encap_tbl_lock);
mutex_destroy(&esw->offloads.decap_tbl_lock);
esw_offloads_cleanup_reps(esw);
- kfree(esw->vports);
+ mlx5_esw_vports_cleanup(esw);
kfree(esw);
}
@@ -1718,8 +1853,29 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
return err;
}
+static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return false;
+
+ return xa_get_mark(&esw->vports, vport_num, mark);
+}
+
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF);
+}
+
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+ return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF);
+}
+
static bool
-is_port_function_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
{
return vport_num == MLX5_VPORT_PF ||
mlx5_eswitch_is_vf_vport(esw, vport_num) ||
@@ -1891,9 +2047,9 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
struct mlx5_vport *evport;
u32 max_guarantee = 0;
- int i;
+ unsigned long i;
- mlx5_esw_for_all_vports(esw, i, evport) {
+ mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled || evport->qos.min_rate < max_guarantee)
continue;
max_guarantee = evport->qos.min_rate;
@@ -1911,11 +2067,11 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
struct mlx5_vport *evport;
u32 vport_max_rate;
u32 vport_min_rate;
+ unsigned long i;
u32 bw_share;
int err;
- int i;
- mlx5_esw_for_all_vports(esw, i, evport) {
+ mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled)
continue;
vport_min_rate = evport->qos.min_rate;
@@ -2205,3 +2361,19 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw)
{
up_write(&esw->mode_lock);
}
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev: Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of eswitch vports.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ struct mlx5_eswitch *esw;
+
+ esw = dev->priv.eswitch;
+ return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b289d756a7e4..64ccb2bc0b58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -176,6 +176,7 @@ struct mlx5_vport {
u16 vport;
bool enabled;
enum mlx5_eswitch_vport_event enabled_events;
+ int index;
struct devlink_port *dl_port;
};
@@ -228,7 +229,7 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
- struct mlx5_eswitch_rep *vport_reps;
+ struct xarray vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
@@ -278,7 +279,7 @@ struct mlx5_eswitch {
struct esw_mc_addr mc_promisc;
/* end of legacy */
struct workqueue_struct *work_queue;
- struct mlx5_vport *vports;
+ struct xarray vports;
u32 flags;
int total_vports;
int enabled_vports;
@@ -545,94 +546,11 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
}
-static inline int mlx5_esw_sf_start_idx(const struct mlx5_eswitch *esw)
-{
- /* PF and VF vports indices start from 0 to max_vfs */
- return MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev);
-}
-
-static inline int mlx5_esw_sf_end_idx(const struct mlx5_eswitch *esw)
-{
- return mlx5_esw_sf_start_idx(esw) + mlx5_sf_max_functions(esw->dev);
-}
-
-static inline int
-mlx5_esw_sf_vport_num_to_index(const struct mlx5_eswitch *esw, u16 vport_num)
-{
- return vport_num - mlx5_sf_start_function_id(esw->dev) +
- MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev);
-}
-
-static inline u16
-mlx5_esw_sf_vport_index_to_num(const struct mlx5_eswitch *esw, int idx)
-{
- return mlx5_sf_start_function_id(esw->dev) + idx -
- (MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev));
-}
-
-static inline bool
-mlx5_esw_is_sf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
-{
- return mlx5_sf_supported(esw->dev) &&
- vport_num >= mlx5_sf_start_function_id(esw->dev) &&
- (vport_num < (mlx5_sf_start_function_id(esw->dev) +
- mlx5_sf_max_functions(esw->dev)));
-}
-
static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev)
{
return mlx5_core_is_ecpf_esw_manager(dev);
}
-static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
-{
- /* Uplink always locate at the last element of the array.*/
- return esw->total_vports - 1;
-}
-
-static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
-{
- return esw->total_vports - 2;
-}
-
-static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
- u16 vport_num)
-{
- if (vport_num == MLX5_VPORT_ECPF) {
- if (!mlx5_ecpf_vport_exists(esw->dev))
- esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
- return mlx5_eswitch_ecpf_idx(esw);
- }
-
- if (vport_num == MLX5_VPORT_UPLINK)
- return mlx5_eswitch_uplink_idx(esw);
-
- if (mlx5_esw_is_sf_vport(esw, vport_num))
- return mlx5_esw_sf_vport_num_to_index(esw, vport_num);
-
- /* PF and VF vports start from 0 to max_vfs */
- return vport_num;
-}
-
-static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
- int index)
-{
- if (index == mlx5_eswitch_ecpf_idx(esw) &&
- mlx5_ecpf_vport_exists(esw->dev))
- return MLX5_VPORT_ECPF;
-
- if (index == mlx5_eswitch_uplink_idx(esw))
- return MLX5_VPORT_UPLINK;
-
- /* SF vports indices are after VFs and before ECPF */
- if (mlx5_sf_supported(esw->dev) &&
- index > mlx5_core_max_vfs(esw->dev))
- return mlx5_esw_sf_vport_index_to_num(esw, index);
-
- /* PF and VF vports start from 0 to max_vfs */
- return index;
-}
-
static inline unsigned int
mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
u16 vport_num)
@@ -649,82 +567,42 @@ mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index)
/* TODO: This mlx5e_tc function shouldn't be called by eswitch */
void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
-/* The vport getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+/* Each mark identifies eswitch vport type.
+ * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using
+ * a single mark.
+ * MLX5_ESW_VPT_VF identifies a SRIOV VF vport.
+ * MLX5_ESW_VPT_SF identifies SF vport.
*/
-#define mlx5_esw_for_all_vports(esw, i, vport) \
- for ((i) = MLX5_VPORT_PF; \
- (vport) = &(esw)->vports[i], \
- (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \
- for ((i) = (esw)->total_vports - 1; \
- (vport) = &(esw)->vports[i], \
- (i) >= MLX5_VPORT_PF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \
- for ((i) = MLX5_VPORT_FIRST_VF; \
- (vport) = &(esw)->vports[(i)], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs) \
- for ((i) = (nvfs); \
- (vport) = &(esw)->vports[(i)], \
- (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-/* The rep getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+#define MLX5_ESW_VPT_HOST_FN XA_MARK_0
+#define MLX5_ESW_VPT_VF XA_MARK_1
+#define MLX5_ESW_VPT_SF XA_MARK_2
+
+/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init.
+ * Borrowed the idea from xa_for_each_marked() but with support for desired last element.
*/
-#define mlx5_esw_for_all_reps(esw, i, rep) \
- for ((i) = MLX5_VPORT_PF; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \
- for ((i) = MLX5_VPORT_FIRST_VF; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \
- for ((i) = (nvfs); \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs) \
- for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \
- for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
-
-/* Includes host PF (vport 0) if it's not esw manager. */
-#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \
- for ((i) = (esw)->first_host_vport; \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \
- for ((i) = (nvfs); \
- (rep) = &(esw)->offloads.vport_reps[i], \
- (i) >= (esw)->first_host_vport; (i)--)
-
-#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \
- for ((vport) = (esw)->first_host_vport; \
- (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \
- for ((vport) = (nvfs); \
- (vport) >= (esw)->first_host_vport; (vport)--)
-
-#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
- for ((i) = mlx5_esw_sf_start_idx(esw); \
- (rep) = &(esw)->offloads.vport_reps[(i)], \
- (i) < mlx5_esw_sf_end_idx(esw); (i++))
+
+#define mlx5_esw_for_each_vport(esw, index, vport) \
+ xa_for_each(&((esw)->vports), index, vport)
+
+#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter) \
+ for (index = 0, entry = xa_find(xa, &index, last, filter); \
+ entry; entry = xa_find_after(xa, &index, last, filter))
+
+#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter) \
+ mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter)
+
+#define mlx5_esw_for_each_vf_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF)
+
+#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \
+ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN)
struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink);
struct mlx5_vport *__must_check
mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num);
int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
@@ -784,12 +662,13 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum);
+ u16 vport_num, u32 controller, u32 sfnum);
void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum);
+ u16 vport_num, u32 controller, u32 sfnum);
void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id);
int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num);
void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num);
@@ -816,6 +695,8 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bbb707117296..db1e74280e57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -49,6 +49,16 @@
#include "en_tc.h"
#include "en/mapping.h"
+#define mlx5_esw_for_each_rep(esw, i, rep) \
+ xa_for_each(&((esw)->offloads.vport_reps), i, rep)
+
+#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
+ xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF)
+
+#define mlx5_esw_for_each_vf_rep(esw, index, rep) \
+ mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \
+ rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF)
+
/* There are two match-all miss flows, one for unicast dst mac and
* one for multicast.
*/
@@ -67,10 +77,7 @@ static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
u16 vport_num)
{
- int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-
- WARN_ON(idx > esw->total_vports - 1);
- return &esw->offloads.vport_reps[idx];
+ return xa_load(&esw->offloads.vport_reps, vport_num);
}
static void
@@ -720,10 +727,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
{
struct mlx5_eswitch_rep *rep;
- int i, err = 0;
+ unsigned long i;
+ int err = 0;
esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
- mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
+ mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) {
if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
continue;
@@ -972,13 +980,13 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
{
struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules;
- int i = 0, num_vfs = esw->esw_funcs.num_vfs, vport_num;
+ int i = 0, num_vfs = esw->esw_funcs.num_vfs;
if (!num_vfs || !flows)
return;
- mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs)
- mlx5_del_flow_rules(flows[i++]);
+ for (i = 0; i < num_vfs; i++)
+ mlx5_del_flow_rules(flows[i]);
kvfree(flows);
}
@@ -992,6 +1000,8 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
struct mlx5_flow_handle *flow_rule;
struct mlx5_flow_handle **flows;
struct mlx5_flow_spec *spec;
+ struct mlx5_vport *vport;
+ unsigned long i;
u16 vport_num;
num_vfs = esw->esw_funcs.num_vfs;
@@ -1016,7 +1026,8 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) {
+ mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+ vport_num = vport->vport;
MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
dest.vport.num = vport_num;
@@ -1158,12 +1169,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_handle **flows;
- struct mlx5_flow_handle *flow;
- struct mlx5_flow_spec *spec;
/* total vports is the same for both e-switches */
int nvports = esw->total_vports;
+ struct mlx5_flow_handle *flow;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_vport *vport;
+ unsigned long i;
void *misc;
- int err, i;
+ int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
@@ -1182,6 +1195,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
misc_parameters);
if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
spec, MLX5_VPORT_PF);
@@ -1191,10 +1205,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_pf_flow_err;
}
- flows[MLX5_VPORT_PF] = flow;
+ flows[vport->index] = flow;
}
if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
@@ -1202,13 +1217,13 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_ecpf_flow_err;
}
- flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+ flows[vport->index] = flow;
}
- mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
esw_set_peer_miss_rule_source_port(esw,
peer_dev->priv.eswitch,
- spec, i);
+ spec, vport->vport);
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
@@ -1216,7 +1231,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
err = PTR_ERR(flow);
goto add_vf_flow_err;
}
- flows[i] = flow;
+ flows[vport->index] = flow;
}
esw->fdb_table.offloads.peer_miss_rules = flows;
@@ -1225,15 +1240,20 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
return 0;
add_vf_flow_err:
- nvports = --i;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports)
- mlx5_del_flow_rules(flows[i]);
-
- if (mlx5_ecpf_vport_exists(esw->dev))
- mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
+ if (!flows[vport->index])
+ continue;
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
add_ecpf_flow_err:
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
add_pf_flow_err:
esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
kvfree(flows);
@@ -1245,20 +1265,23 @@ alloc_flows_err:
static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
{
struct mlx5_flow_handle **flows;
- int i;
+ struct mlx5_vport *vport;
+ unsigned long i;
flows = esw->fdb_table.offloads.peer_miss_rules;
- mlx5_esw_for_each_vf_vport_num_reverse(esw, i,
- mlx5_core_max_vfs(esw->dev))
- mlx5_del_flow_rules(flows[i]);
-
- if (mlx5_ecpf_vport_exists(esw->dev))
- mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+ mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
+ mlx5_del_flow_rules(flows[vport->index]);
- if (mlx5_core_is_ecpf_esw_manager(esw->dev))
- mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+ if (mlx5_ecpf_vport_exists(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+ mlx5_del_flow_rules(flows[vport->index]);
+ }
kvfree(flows);
}
@@ -1402,11 +1425,11 @@ static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
{
struct mlx5_vport_tbl_attr attr;
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
attr.chain = 0;
attr.prio = 1;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
attr.vport = vport->vport;
attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
mlx5_esw_vporttbl_put(esw, &attr);
@@ -1418,11 +1441,11 @@ static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
struct mlx5_vport_tbl_attr attr;
struct mlx5_flow_table *fdb;
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
attr.chain = 0;
attr.prio = 1;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
attr.vport = vport->vport;
attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
fdb = mlx5_esw_vporttbl_get(esw, &attr);
@@ -1910,12 +1933,12 @@ out:
return flow_rule;
}
-
-static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode)
+static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
{
u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
struct mlx5_core_dev *dev = esw->dev;
- int vport;
+ struct mlx5_vport *vport;
+ unsigned long i;
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
@@ -1936,8 +1959,8 @@ static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode
query_vports:
mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
- mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
- mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode);
if (prev_mlx5_mode != mlx5_mode)
return -EINVAL;
prev_mlx5_mode = mlx5_mode;
@@ -2080,34 +2103,82 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
return err;
}
-void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ xa_mark_t mark)
{
- kfree(esw->offloads.vport_reps);
+ bool mark_set;
+
+ /* Copy the mark from vport to its rep */
+ mark_set = xa_get_mark(&esw->vports, rep->vport, mark);
+ if (mark_set)
+ xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark);
}
-int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport)
{
- int total_vports = esw->total_vports;
struct mlx5_eswitch_rep *rep;
- int vport_index;
- u8 rep_type;
+ int rep_type;
+ int err;
- esw->offloads.vport_reps = kcalloc(total_vports,
- sizeof(struct mlx5_eswitch_rep),
- GFP_KERNEL);
- if (!esw->offloads.vport_reps)
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
return -ENOMEM;
- mlx5_esw_for_all_reps(esw, vport_index, rep) {
- rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
- rep->vport_index = vport_index;
+ rep->vport = vport->vport;
+ rep->vport_index = vport->index;
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+ atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
- for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
- atomic_set(&rep->rep_data[rep_type].state,
- REP_UNREGISTERED);
- }
+ err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
+ if (err)
+ goto insert_err;
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF);
+ mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF);
return 0;
+
+insert_err:
+ kfree(rep);
+ return err;
+}
+
+static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep)
+{
+ xa_erase(&esw->offloads.vport_reps, rep->vport);
+ kfree(rep);
+}
+
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+
+ mlx5_esw_for_each_rep(esw, i, rep)
+ mlx5_esw_offloads_rep_cleanup(esw, rep);
+ xa_destroy(&esw->offloads.vport_reps);
+}
+
+int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ xa_init(&esw->offloads.vport_reps);
+
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ err = mlx5_esw_offloads_rep_init(esw, vport);
+ if (err)
+ goto err;
+ }
+ return 0;
+
+err:
+ esw_offloads_cleanup_reps(esw);
+ return err;
}
static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
@@ -2121,7 +2192,7 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
mlx5_esw_for_each_sf_rep(esw, i, rep)
__esw_offloads_unload_rep(esw, rep, rep_type);
@@ -2130,11 +2201,11 @@ static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
__unload_reps_sf_vport(esw, rep_type);
- mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs)
+ mlx5_esw_for_each_vf_rep(esw, i, rep)
__esw_offloads_unload_rep(esw, rep, rep_type);
if (mlx5_ecpf_vport_exists(esw->dev)) {
@@ -2421,25 +2492,25 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- int i;
+ unsigned long i;
if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
return;
- mlx5_esw_for_all_vports_reverse(esw, i, vport)
+ mlx5_esw_for_each_vport(esw, i, vport)
esw_offloads_vport_metadata_cleanup(esw, vport);
}
static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
+ unsigned long i;
int err;
- int i;
if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
return 0;
- mlx5_esw_for_all_vports(esw, i, vport) {
+ mlx5_esw_for_each_vport(esw, i, vport) {
err = esw_offloads_vport_metadata_setup(esw, vport);
if (err)
goto metadata_err;
@@ -2676,11 +2747,25 @@ static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
return 0;
}
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller)
+{
+ /* Local controller is always valid */
+ if (controller == 0)
+ return true;
+
+ if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+ return false;
+
+ /* External host number starts with zero in device */
+ return (controller == esw->offloads.host_number + 1);
+}
+
int esw_offloads_enable(struct mlx5_eswitch *esw)
{
struct mapping_ctx *reg_c0_obj_pool;
struct mlx5_vport *vport;
- int err, i;
+ unsigned long i;
+ int err;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
@@ -2926,13 +3011,44 @@ unlock:
return err;
}
+static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_vport *vport;
+ u16 err_vport_num = 0;
+ unsigned long i;
+ int err = 0;
+
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
+ if (err) {
+ err_vport_num = vport->vport;
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to set min inline on vport");
+ goto revert_inline_mode;
+ }
+ }
+ return 0;
+
+revert_inline_mode:
+ mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+ if (vport->vport == err_vport_num)
+ break;
+ mlx5_modify_nic_vport_min_inline(dev,
+ vport->vport,
+ esw->offloads.inline_mode);
+ }
+ return err;
+}
+
int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- int err, vport, num_vport;
struct mlx5_eswitch *esw;
u8 mlx5_mode;
+ int err;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
@@ -2967,25 +3083,14 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
if (err)
goto out;
- mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
- err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed to set min inline on vport");
- goto revert_inline_mode;
- }
- }
+ err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
+ if (err)
+ goto out;
esw->offloads.inline_mode = mlx5_mode;
up_write(&esw->mode_lock);
return 0;
-revert_inline_mode:
- num_vport = --vport;
- mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
- mlx5_modify_nic_vport_min_inline(dev,
- vport,
- esw->offloads.inline_mode);
out:
up_write(&esw->mode_lock);
return err;
@@ -3116,11 +3221,11 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
{
struct mlx5_eswitch_rep_data *rep_data;
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
esw->offloads.rep_ops[rep_type] = ops;
- mlx5_esw_for_all_reps(esw, i, rep) {
- if (likely(mlx5_eswitch_vport_has_rep(esw, i))) {
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) {
rep->esw = esw;
rep_data = &rep->rep_data[rep_type];
atomic_set(&rep_data->state, REP_REGISTERED);
@@ -3132,12 +3237,12 @@ EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
{
struct mlx5_eswitch_rep *rep;
- int i;
+ unsigned long i;
if (esw->mode == MLX5_ESWITCH_OFFLOADS)
__unload_reps_all_vport(esw, rep_type);
- mlx5_esw_for_all_reps(esw, i, rep)
+ mlx5_esw_for_each_rep(esw, i, rep)
atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
}
EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
@@ -3178,12 +3283,6 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
}
EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
-{
- return vport_num >= MLX5_VPORT_FIRST_VF &&
- vport_num <= esw->dev->priv.sriov.max_vfs;
-}
-
bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
{
return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED);
@@ -3209,7 +3308,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
- u16 vport_num, u32 sfnum)
+ u16 vport_num, u32 controller, u32 sfnum)
{
int err;
@@ -3217,7 +3316,7 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
if (err)
return err;
- err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, sfnum);
+ err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum);
if (err)
goto devlink_err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index 90b524c59f3c..6a0c6f965ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -148,9 +148,19 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
const struct mlx5_vhca_state_event *event = data;
struct mlx5_sf_dev *sf_dev;
+ u16 max_functions;
u16 sf_index;
+ u16 base_id;
+
+ max_functions = mlx5_sf_max_functions(table->dev);
+ if (!max_functions)
+ return 0;
+
+ base_id = MLX5_CAP_GEN(table->dev, sf_base_id);
+ if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
+ return 0;
- sf_index = event->function_id - MLX5_CAP_GEN(table->dev, sf_base_id);
+ sf_index = event->function_id - base_id;
sf_dev = xa_load(&table->devices, sf_index);
switch (event->new_vhca_state) {
case MLX5_VHCA_STATE_ALLOCATED:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 52226d9b9a6d..a8e73c9ed1ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -12,6 +12,7 @@
struct mlx5_sf {
struct devlink_port dl_port;
unsigned int port_index;
+ u32 controller;
u16 id;
u16 hw_fn_id;
u16 hw_state;
@@ -58,7 +59,8 @@ static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf)
}
static struct mlx5_sf *
-mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *extack)
+mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw,
+ u32 controller, u32 sfnum, struct netlink_ext_ack *extack)
{
unsigned int dl_port_index;
struct mlx5_sf *sf;
@@ -66,7 +68,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
int id_err;
int err;
- id_err = mlx5_sf_hw_table_sf_alloc(table->dev, sfnum);
+ if (!mlx5_esw_offloads_controller_valid(esw, controller)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid controller number");
+ return ERR_PTR(-EINVAL);
+ }
+
+ id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum);
if (id_err < 0) {
err = id_err;
goto id_err;
@@ -78,11 +85,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
goto alloc_err;
}
sf->id = id_err;
- hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sf->id);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id);
dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id);
sf->port_index = dl_port_index;
sf->hw_fn_id = hw_fn_id;
sf->hw_state = MLX5_VHCA_STATE_ALLOCATED;
+ sf->controller = controller;
err = mlx5_sf_id_insert(table, sf);
if (err)
@@ -93,7 +101,7 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
insert_err:
kfree(sf);
alloc_err:
- mlx5_sf_hw_table_sf_free(table->dev, id_err);
+ mlx5_sf_hw_table_sf_free(table->dev, controller, id_err);
id_err:
if (err == -EEXIST)
NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum");
@@ -103,7 +111,7 @@ id_err:
static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf)
{
mlx5_sf_id_erase(table, sf);
- mlx5_sf_hw_table_sf_free(table->dev, sf->id);
+ mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id);
kfree(sf);
}
@@ -272,12 +280,12 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
struct mlx5_sf *sf;
int err;
- sf = mlx5_sf_alloc(table, new_attr->sfnum, extack);
+ sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack);
if (IS_ERR(sf))
return PTR_ERR(sf);
err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id,
- new_attr->sfnum);
+ new_attr->controller, new_attr->sfnum);
if (err)
goto esw_err;
*new_port_index = sf->port_index;
@@ -306,7 +314,8 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_
"User must provide unique sfnum. Driver does not support auto assignment");
return -EOPNOTSUPP;
}
- if (new_attr->controller_valid && new_attr->controller) {
+ if (new_attr->controller_valid && new_attr->controller &&
+ !mlx5_core_is_ecpf_esw_manager(dev)) {
NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported");
return -EOPNOTSUPP;
}
@@ -352,10 +361,10 @@ static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf)
* firmware gives confirmation that it is detached by the driver.
*/
mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id);
- mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id);
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
kfree(sf);
} else {
- mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id);
+ mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
kfree(sf);
}
}
@@ -437,9 +446,6 @@ sf_err:
static void mlx5_sf_table_enable(struct mlx5_sf_table *table)
{
- if (!mlx5_sf_max_functions(table->dev))
- return;
-
init_completion(&table->disable_complete);
refcount_set(&table->refcount, 1);
}
@@ -462,9 +468,6 @@ static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table)
static void mlx5_sf_table_disable(struct mlx5_sf_table *table)
{
- if (!mlx5_sf_max_functions(table->dev))
- return;
-
if (!refcount_read(&table->refcount))
return;
@@ -498,7 +501,8 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev)
{
- return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && mlx5_sf_supported(dev);
+ return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) &&
+ mlx5_sf_hw_table_supported(dev);
}
int mlx5_sf_table_init(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index ec53c11c8344..ef5f892aafad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -8,6 +8,7 @@
#include "ecpf.h"
#include "vhca_event.h"
#include "mlx5_core.h"
+#include "eswitch.h"
struct mlx5_sf_hw {
u32 usr_sfnum;
@@ -15,59 +16,113 @@ struct mlx5_sf_hw {
u8 pending_delete: 1;
};
+struct mlx5_sf_hwc_table {
+ struct mlx5_sf_hw *sfs;
+ int max_fn;
+ u16 start_fn_id;
+};
+
+enum mlx5_sf_hwc_index {
+ MLX5_SF_HWC_LOCAL,
+ MLX5_SF_HWC_EXTERNAL,
+ MLX5_SF_HWC_MAX,
+};
+
struct mlx5_sf_hw_table {
struct mlx5_core_dev *dev;
- struct mlx5_sf_hw *sfs;
- int max_local_functions;
struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
struct notifier_block vhca_nb;
+ struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX];
};
-u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id)
+static struct mlx5_sf_hwc_table *
+mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller)
{
- return sw_id + mlx5_sf_start_function_id(dev);
+ int idx = !!controller;
+
+ return &dev->priv.sf_hw_table->hwc[idx];
}
-static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id)
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id)
{
- return hw_id - mlx5_sf_start_function_id(dev);
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
+ return hwc->start_fn_id + sw_id;
}
-int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
+static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id)
+{
+ return hw_id - hwc->start_fn_id;
+}
+
+static struct mlx5_sf_hwc_table *
+mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id)
{
- struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
- int sw_id = -ENOSPC;
- u16 hw_fn_id;
- int err;
int i;
- if (!table->max_local_functions)
- return -EOPNOTSUPP;
+ for (i = 0; i < ARRAY_SIZE(table->hwc); i++) {
+ if (table->hwc[i].max_fn &&
+ fn_id >= table->hwc[i].start_fn_id &&
+ fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn))
+ return &table->hwc[i];
+ }
+ return NULL;
+}
+
+static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller,
+ u32 usr_sfnum)
+{
+ struct mlx5_sf_hwc_table *hwc;
+ int i;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ if (!hwc->sfs)
+ return -ENOSPC;
- mutex_lock(&table->table_lock);
/* Check if sf with same sfnum already exists or not. */
- for (i = 0; i < table->max_local_functions; i++) {
- if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum) {
- err = -EEXIST;
- goto exist_err;
- }
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum)
+ return -EEXIST;
}
-
/* Find the free entry and allocate the entry from the array */
- for (i = 0; i < table->max_local_functions; i++) {
- if (!table->sfs[i].allocated) {
- table->sfs[i].usr_sfnum = usr_sfnum;
- table->sfs[i].allocated = true;
- sw_id = i;
- break;
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (!hwc->sfs[i].allocated) {
+ hwc->sfs[i].usr_sfnum = usr_sfnum;
+ hwc->sfs[i].allocated = true;
+ return i;
}
}
- if (sw_id == -ENOSPC) {
- err = -ENOSPC;
+ return -ENOSPC;
+}
+
+static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id)
+{
+ struct mlx5_sf_hwc_table *hwc;
+
+ hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+ hwc->sfs[id].allocated = false;
+ hwc->sfs[id].pending_delete = false;
+}
+
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum)
+{
+ struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+ u16 hw_fn_id;
+ int sw_id;
+ int err;
+
+ if (!table)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&table->table_lock);
+ sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum);
+ if (sw_id < 0) {
+ err = sw_id;
goto exist_err;
}
- hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sw_id);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id);
err = mlx5_cmd_alloc_sf(dev, hw_fn_id);
if (err)
goto err;
@@ -76,47 +131,58 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
if (err)
goto vhca_err;
+ if (controller) {
+ /* If this SF is for external controller, SF manager
+ * needs to arm firmware to receive the events.
+ */
+ err = mlx5_vhca_event_arm(dev, hw_fn_id);
+ if (err)
+ goto vhca_err;
+ }
+
mutex_unlock(&table->table_lock);
return sw_id;
vhca_err:
mlx5_cmd_dealloc_sf(dev, hw_fn_id);
err:
- table->sfs[i].allocated = false;
+ mlx5_sf_hw_table_id_free(table, controller, sw_id);
exist_err:
mutex_unlock(&table->table_lock);
return err;
}
-static void _mlx5_sf_hw_id_free(struct mlx5_core_dev *dev, u16 id)
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
{
struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
u16 hw_fn_id;
- hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
+ mutex_lock(&table->table_lock);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
mlx5_cmd_dealloc_sf(dev, hw_fn_id);
- table->sfs[id].allocated = false;
- table->sfs[id].pending_delete = false;
+ mlx5_sf_hw_table_id_free(table, controller, id);
+ mutex_unlock(&table->table_lock);
}
-void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id)
+static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc, int idx)
{
- struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
-
- mutex_lock(&table->table_lock);
- _mlx5_sf_hw_id_free(dev, id);
- mutex_unlock(&table->table_lock);
+ mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx);
+ hwc->sfs[idx].allocated = false;
+ hwc->sfs[idx].pending_delete = false;
}
-void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
{
struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+ struct mlx5_sf_hwc_table *hwc;
u16 hw_fn_id;
u8 state;
int err;
- hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
+ hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
+ hwc = mlx5_sf_controller_to_hwc(dev, controller);
mutex_lock(&table->table_lock);
err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
if (err)
@@ -124,53 +190,102 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
if (state == MLX5_VHCA_STATE_ALLOCATED) {
mlx5_cmd_dealloc_sf(dev, hw_fn_id);
- table->sfs[id].allocated = false;
+ hwc->sfs[id].allocated = false;
} else {
- table->sfs[id].pending_delete = true;
+ hwc->sfs[id].pending_delete = true;
}
err:
mutex_unlock(&table->table_lock);
}
-static void mlx5_sf_hw_dealloc_all(struct mlx5_sf_hw_table *table)
+static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev,
+ struct mlx5_sf_hwc_table *hwc)
{
int i;
- for (i = 0; i < table->max_local_functions; i++) {
- if (table->sfs[i].allocated)
- _mlx5_sf_hw_id_free(table->dev, i);
+ for (i = 0; i < hwc->max_fn; i++) {
+ if (hwc->sfs[i].allocated)
+ mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i);
}
}
+static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table)
+{
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]);
+}
+
+static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id)
+{
+ struct mlx5_sf_hw *sfs;
+
+ if (!max_fn)
+ return 0;
+
+ sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL);
+ if (!sfs)
+ return -ENOMEM;
+
+ hwc->sfs = sfs;
+ hwc->max_fn = max_fn;
+ hwc->start_fn_id = base_id;
+ return 0;
+}
+
+static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc)
+{
+ kfree(hwc->sfs);
+}
+
int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
{
struct mlx5_sf_hw_table *table;
- struct mlx5_sf_hw *sfs;
- int max_functions;
+ u16 max_ext_fn = 0;
+ u16 ext_base_id;
+ u16 max_fn = 0;
+ u16 base_id;
+ int err;
- if (!mlx5_sf_supported(dev) || !mlx5_vhca_event_supported(dev))
+ if (!mlx5_vhca_event_supported(dev))
+ return 0;
+
+ if (mlx5_sf_supported(dev))
+ max_fn = mlx5_sf_max_functions(dev);
+
+ err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id);
+ if (err)
+ return err;
+
+ if (!max_fn && !max_ext_fn)
return 0;
- max_functions = mlx5_sf_max_functions(dev);
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;
- sfs = kcalloc(max_functions, sizeof(*sfs), GFP_KERNEL);
- if (!sfs)
- goto table_err;
-
mutex_init(&table->table_lock);
table->dev = dev;
- table->sfs = sfs;
- table->max_local_functions = max_functions;
dev->priv.sf_hw_table = table;
- mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions);
+
+ base_id = mlx5_sf_start_function_id(dev);
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id);
+ if (err)
+ goto table_err;
+
+ err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL],
+ max_ext_fn, ext_base_id);
+ if (err)
+ goto ext_err;
+
+ mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn);
return 0;
+ext_err:
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
table_err:
+ mutex_destroy(&table->table_lock);
kfree(table);
- return -ENOMEM;
+ return err;
}
void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
@@ -181,7 +296,8 @@ void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
return;
mutex_destroy(&table->table_lock);
- kfree(table->sfs);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]);
+ mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
kfree(table);
}
@@ -189,21 +305,26 @@ static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode
{
struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb);
const struct mlx5_vhca_state_event *event = data;
+ struct mlx5_sf_hwc_table *hwc;
struct mlx5_sf_hw *sf_hw;
u16 sw_id;
if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED)
return 0;
- sw_id = mlx5_sf_hw_to_sw_id(table->dev, event->function_id);
- sf_hw = &table->sfs[sw_id];
+ hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id);
+ if (!hwc)
+ return 0;
+
+ sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id);
+ sf_hw = &hwc->sfs[sw_id];
mutex_lock(&table->table_lock);
/* SF driver notified through firmware that SF is finally detached.
* Hence recycle the sf hardware id for reuse.
*/
if (sf_hw->allocated && sf_hw->pending_delete)
- _mlx5_sf_hw_id_free(table->dev, sw_id);
+ mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id);
mutex_unlock(&table->table_lock);
return 0;
}
@@ -228,5 +349,10 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb);
/* Dealloc SFs whose firmware event has been missed. */
- mlx5_sf_hw_dealloc_all(table);
+ mlx5_sf_hw_table_dealloc_all(table);
+}
+
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev)
+{
+ return !!dev->priv.sf_hw_table;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
index cb02a51d0986..7114f3fc335f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
@@ -12,10 +12,11 @@ int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id);
int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
-u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id);
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id);
-int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum);
-void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id);
-void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id);
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum);
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index e05c5c0f3ae1..457ad42eaa2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1151,20 +1151,6 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
}
EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
-/**
- * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
- *
- * @dev: Pointer to core device
- *
- * mlx5_eswitch_get_total_vports returns total number of vports for
- * the eswitch.
- */
-u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
-{
- return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev);
-}
-EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
-
int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
{
u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index 713ee3041d49..bea978df7713 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
@@ -364,6 +364,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
attrs.split = eth_port.is_split;
attrs.splittable = !attrs.split;
+ attrs.lanes = eth_port.port_lanes;
attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
attrs.phys.port_number = eth_port.label_port;
attrs.phys.split_subport_number = eth_port.label_subport;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 9e5dad41cdc9..4afff320dfd0 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -913,31 +913,20 @@ static int ravb_poll(struct napi_struct *napi, int budget)
int q = napi - priv->napi;
int mask = BIT(q);
int quota = budget;
- u32 ris0, tis;
- for (;;) {
- tis = ravb_read(ndev, TIS);
- ris0 = ravb_read(ndev, RIS0);
- if (!((ris0 & mask) || (tis & mask)))
- break;
+ /* Processing RX Descriptor Ring */
+ /* Clear RX interrupt */
+ ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
+ if (ravb_rx(ndev, &quota, q))
+ goto out;
- /* Processing RX Descriptor Ring */
- if (ris0 & mask) {
- /* Clear RX interrupt */
- ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- if (ravb_rx(ndev, &quota, q))
- goto out;
- }
- /* Processing TX Descriptor Ring */
- if (tis & mask) {
- spin_lock_irqsave(&priv->lock, flags);
- /* Clear TX interrupt */
- ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
- ravb_tx_free(ndev, q, true);
- netif_wake_subqueue(ndev, q);
- spin_unlock_irqrestore(&priv->lock, flags);
- }
- }
+ /* Processing RX Descriptor Ring */
+ spin_lock_irqsave(&priv->lock, flags);
+ /* Clear TX interrupt */
+ ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
+ ravb_tx_free(ndev, q, true);
+ netif_wake_subqueue(ndev, q);
+ spin_unlock_irqrestore(&priv->lock, flags);
napi_complete(napi);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index c873f961d5a5..c3f35da1b82a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2944,8 +2944,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
/* Get the transmit queue */
tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
- tx_queue = efx_channel_get_tx_queue(channel,
- tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue + (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
if (!tx_queue->timestamping) {
/* Transmit completion */
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index d75cf5ff5686..49df02ecee91 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -835,14 +835,14 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
/* Transmit completion */
tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
- tx_queue = efx_channel_get_tx_queue(
- channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue +
+ (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
efx_xmit_done(tx_queue, tx_ev_desc_ptr);
} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
/* Rewrite the FIFO write pointer */
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
- tx_queue = efx_channel_get_tx_queue(
- channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue +
+ (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
netif_tx_lock(efx->net_dev);
efx_farch_notify_tx_desc(tx_queue);
@@ -1081,16 +1081,16 @@ static void
efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
{
struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
int qid;
qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
- tx_queue = efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL,
- qid % EFX_MAX_TXQ_PER_CHANNEL);
- if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
+ channel = efx_get_tx_channel(efx, qid / EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue + (qid % EFX_MAX_TXQ_PER_CHANNEL);
+ if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0))
efx_farch_magic_event(tx_queue->channel,
EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
- }
}
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 372090e8ee6f..a9a984c57d78 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3303,8 +3303,15 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
/* Enable TSO */
if (priv->tso) {
- for (chan = 0; chan < tx_cnt; chan++)
+ for (chan = 0; chan < tx_cnt; chan++) {
+ struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+ /* TSO and TBS cannot co-exist */
+ if (tx_q->tbs & STMMAC_TBS_AVAIL)
+ continue;
+
stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
+ }
}
/* Enable Split Header */
@@ -3674,9 +3681,8 @@ int stmmac_open(struct net_device *dev)
struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
int tbs_en = priv->plat->tx_queues_cfg[chan].tbs_en;
+ /* Setup per-TXQ tbs flag before TX descriptor alloc */
tx_q->tbs |= tbs_en ? STMMAC_TBS_AVAIL : 0;
- if (stmmac_enable_tbs(priv, priv->ioaddr, tbs_en, chan))
- tx_q->tbs &= ~STMMAC_TBS_AVAIL;
}
ret = alloc_dma_desc_resources(priv);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 14e7da7d302f..f9417b44cae8 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -169,11 +169,11 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
/* EMAC mac_status register */
#define EMAC_MACSTATUS_TXERRCODE_MASK (0xF00000)
#define EMAC_MACSTATUS_TXERRCODE_SHIFT (20)
-#define EMAC_MACSTATUS_TXERRCH_MASK (0x7)
+#define EMAC_MACSTATUS_TXERRCH_MASK (0x70000)
#define EMAC_MACSTATUS_TXERRCH_SHIFT (16)
#define EMAC_MACSTATUS_RXERRCODE_MASK (0xF000)
#define EMAC_MACSTATUS_RXERRCODE_SHIFT (12)
-#define EMAC_MACSTATUS_RXERRCH_MASK (0x7)
+#define EMAC_MACSTATUS_RXERRCH_MASK (0x700)
#define EMAC_MACSTATUS_RXERRCH_SHIFT (8)
/* EMAC RX register masks */
diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig
index 7b83a6e5d894..468ffe3d1707 100644
--- a/drivers/net/ethernet/xscale/Kconfig
+++ b/drivers/net/ethernet/xscale/Kconfig
@@ -22,6 +22,7 @@ config IXP4XX_ETH
tristate "Intel IXP4xx Ethernet support"
depends on ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR
select PHYLIB
+ select OF_MDIO if OF
select NET_PTP_CLASSIFY
help
Say Y here if you want to use built-in Ethernet ports
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 0152f1e70783..cb89323855d8 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -28,6 +28,7 @@
#include <linux/kernel.h>
#include <linux/net_tstamp.h>
#include <linux/of.h>
+#include <linux/of_mdio.h>
#include <linux/phy.h>
#include <linux/platform_data/eth_ixp4xx.h>
#include <linux/platform_device.h>
@@ -165,7 +166,6 @@ struct eth_regs {
};
struct port {
- struct resource *mem_res;
struct eth_regs __iomem *regs;
struct npe *npe;
struct net_device *netdev;
@@ -250,6 +250,7 @@ static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt)
static DEFINE_SPINLOCK(mdio_lock);
static struct eth_regs __iomem *mdio_regs; /* mdio command and status only */
static struct mii_bus *mdio_bus;
+static struct device_node *mdio_bus_np;
static int ports_open;
static struct port *npe_port_tab[MAX_NPES];
static struct dma_pool *dma_pool;
@@ -533,7 +534,8 @@ static int ixp4xx_mdio_register(struct eth_regs __iomem *regs)
mdio_bus->write = &ixp4xx_mdio_write;
snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "ixp4xx-eth-0");
- if ((err = mdiobus_register(mdio_bus)))
+ err = of_mdiobus_register(mdio_bus, mdio_bus_np);
+ if (err)
mdiobus_free(mdio_bus);
return err;
}
@@ -1085,7 +1087,7 @@ static int init_queues(struct port *port)
int i;
if (!ports_open) {
- dma_pool = dma_pool_create(DRV_NAME, port->netdev->dev.parent,
+ dma_pool = dma_pool_create(DRV_NAME, &port->netdev->dev,
POOL_ALLOC_SIZE, 32, 0);
if (!dma_pool)
return -ENOMEM;
@@ -1358,19 +1360,118 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
};
+#ifdef CONFIG_OF
+static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev)
+{
+ struct device_node *np = dev->of_node;
+ struct of_phandle_args queue_spec;
+ struct of_phandle_args npe_spec;
+ struct device_node *mdio_np;
+ struct eth_plat_info *plat;
+ int ret;
+
+ plat = devm_kzalloc(dev, sizeof(*plat), GFP_KERNEL);
+ if (!plat)
+ return NULL;
+
+ ret = of_parse_phandle_with_fixed_args(np, "intel,npe-handle", 1, 0,
+ &npe_spec);
+ if (ret) {
+ dev_err(dev, "no NPE engine specified\n");
+ return NULL;
+ }
+ /* NPE ID 0x00, 0x10, 0x20... */
+ plat->npe = (npe_spec.args[0] << 4);
+
+ /* Check if this device has an MDIO bus */
+ mdio_np = of_get_child_by_name(np, "mdio");
+ if (mdio_np) {
+ plat->has_mdio = true;
+ mdio_bus_np = mdio_np;
+ /* DO NOT put the mdio_np, it will be used */
+ }
+
+ /* Get the rx queue as a resource from queue manager */
+ ret = of_parse_phandle_with_fixed_args(np, "queue-rx", 1, 0,
+ &queue_spec);
+ if (ret) {
+ dev_err(dev, "no rx queue phandle\n");
+ return NULL;
+ }
+ plat->rxq = queue_spec.args[0];
+
+ /* Get the txready queue as resource from queue manager */
+ ret = of_parse_phandle_with_fixed_args(np, "queue-txready", 1, 0,
+ &queue_spec);
+ if (ret) {
+ dev_err(dev, "no txready queue phandle\n");
+ return NULL;
+ }
+ plat->txreadyq = queue_spec.args[0];
+
+ return plat;
+}
+#else
+static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev)
+{
+ return NULL;
+}
+#endif
+
static int ixp4xx_eth_probe(struct platform_device *pdev)
{
- char phy_id[MII_BUS_ID_SIZE + 3];
struct phy_device *phydev = NULL;
struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
struct eth_plat_info *plat;
- resource_size_t regs_phys;
struct net_device *ndev;
struct resource *res;
struct port *port;
int err;
- plat = dev_get_platdata(dev);
+ if (np) {
+ plat = ixp4xx_of_get_platdata(dev);
+ if (!plat)
+ return -ENODEV;
+ } else {
+ plat = dev_get_platdata(dev);
+ if (!plat)
+ return -ENODEV;
+ plat->npe = pdev->id;
+ switch (plat->npe) {
+ case IXP4XX_ETH_NPEA:
+ /* If the MDIO bus is not up yet, defer probe */
+ break;
+ case IXP4XX_ETH_NPEB:
+ /* On all except IXP43x, NPE-B is used for the MDIO bus.
+ * If there is no NPE-B in the feature set, bail out,
+ * else we have the MDIO bus here.
+ */
+ if (!cpu_is_ixp43x()) {
+ if (!(ixp4xx_read_feature_bits() &
+ IXP4XX_FEATURE_NPEB_ETH0))
+ return -ENODEV;
+ /* Else register the MDIO bus on NPE-B */
+ plat->has_mdio = true;
+ }
+ break;
+ case IXP4XX_ETH_NPEC:
+ /* IXP43x lacks NPE-B and uses NPE-C for the MDIO bus
+ * access, if there is no NPE-C, no bus, nothing works,
+ * so bail out.
+ */
+ if (cpu_is_ixp43x()) {
+ if (!(ixp4xx_read_feature_bits() &
+ IXP4XX_FEATURE_NPEC_ETH))
+ return -ENODEV;
+ /* Else register the MDIO bus on NPE-B */
+ plat->has_mdio = true;
+ }
+ break;
+ default:
+ return -ENODEV;
+ }
+ }
if (!(ndev = devm_alloc_etherdev(dev, sizeof(struct port))))
return -ENOMEM;
@@ -1378,75 +1479,42 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
SET_NETDEV_DEV(ndev, dev);
port = netdev_priv(ndev);
port->netdev = ndev;
- port->id = pdev->id;
+ port->id = plat->npe;
/* Get the port resource and remap */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -ENODEV;
- regs_phys = res->start;
port->regs = devm_ioremap_resource(dev, res);
if (IS_ERR(port->regs))
return PTR_ERR(port->regs);
- switch (port->id) {
- case IXP4XX_ETH_NPEA:
- /* If the MDIO bus is not up yet, defer probe */
- if (!mdio_bus)
- return -EPROBE_DEFER;
- break;
- case IXP4XX_ETH_NPEB:
- /*
- * On all except IXP43x, NPE-B is used for the MDIO bus.
- * If there is no NPE-B in the feature set, bail out, else
- * register the MDIO bus.
- */
- if (!cpu_is_ixp43x()) {
- if (!(ixp4xx_read_feature_bits() &
- IXP4XX_FEATURE_NPEB_ETH0))
- return -ENODEV;
- /* Else register the MDIO bus on NPE-B */
- if ((err = ixp4xx_mdio_register(port->regs)))
- return err;
- }
- if (!mdio_bus)
- return -EPROBE_DEFER;
- break;
- case IXP4XX_ETH_NPEC:
- /*
- * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access,
- * of there is no NPE-C, no bus, nothing works, so bail out.
- */
- if (cpu_is_ixp43x()) {
- if (!(ixp4xx_read_feature_bits() &
- IXP4XX_FEATURE_NPEC_ETH))
- return -ENODEV;
- /* Else register the MDIO bus on NPE-C */
- if ((err = ixp4xx_mdio_register(port->regs)))
- return err;
+ /* Register the MDIO bus if we have it */
+ if (plat->has_mdio) {
+ err = ixp4xx_mdio_register(port->regs);
+ if (err) {
+ dev_err(dev, "failed to register MDIO bus\n");
+ return err;
}
- if (!mdio_bus)
- return -EPROBE_DEFER;
- break;
- default:
- return -ENODEV;
}
+ /* If the instance with the MDIO bus has not yet appeared,
+ * defer probing until it gets probed.
+ */
+ if (!mdio_bus)
+ return -EPROBE_DEFER;
ndev->netdev_ops = &ixp4xx_netdev_ops;
ndev->ethtool_ops = &ixp4xx_ethtool_ops;
ndev->tx_queue_len = 100;
+ /* Inherit the DMA masks from the platform device */
+ ndev->dev.dma_mask = dev->dma_mask;
+ ndev->dev.coherent_dma_mask = dev->coherent_dma_mask;
netif_napi_add(ndev, &port->napi, eth_poll, NAPI_WEIGHT);
if (!(port->npe = npe_request(NPE_ID(port->id))))
return -EIO;
- port->mem_res = request_mem_region(regs_phys, REGS_SIZE, ndev->name);
- if (!port->mem_res) {
- err = -EBUSY;
- goto err_npe_rel;
- }
-
port->plat = plat;
npe_port_tab[NPE_ID(port->id)] = port;
memcpy(ndev->dev_addr, plat->hwaddr, ETH_ALEN);
@@ -1459,12 +1527,24 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
__raw_writel(DEFAULT_CORE_CNTRL, &port->regs->core_control);
udelay(50);
- snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
- mdio_bus->id, plat->phy);
- phydev = phy_connect(ndev, phy_id, &ixp4xx_adjust_link,
- PHY_INTERFACE_MODE_MII);
- if (IS_ERR(phydev)) {
- err = PTR_ERR(phydev);
+ if (np) {
+ phydev = of_phy_get_and_connect(ndev, np, ixp4xx_adjust_link);
+ } else {
+ phydev = mdiobus_get_phy(mdio_bus, plat->phy);
+ if (IS_ERR(phydev)) {
+ err = PTR_ERR(phydev);
+ dev_err(dev, "could not connect phydev (%d)\n", err);
+ goto err_free_mem;
+ }
+ err = phy_connect_direct(ndev, phydev, ixp4xx_adjust_link,
+ PHY_INTERFACE_MODE_MII);
+ if (err)
+ goto err_free_mem;
+
+ }
+ if (!phydev) {
+ err = -ENODEV;
+ dev_err(dev, "no phydev\n");
goto err_free_mem;
}
@@ -1482,8 +1562,6 @@ err_phy_dis:
phy_disconnect(phydev);
err_free_mem:
npe_port_tab[NPE_ID(port->id)] = NULL;
- release_resource(port->mem_res);
-err_npe_rel:
npe_release(port->npe);
return err;
}
@@ -1499,12 +1577,21 @@ static int ixp4xx_eth_remove(struct platform_device *pdev)
ixp4xx_mdio_remove();
npe_port_tab[NPE_ID(port->id)] = NULL;
npe_release(port->npe);
- release_resource(port->mem_res);
return 0;
}
+static const struct of_device_id ixp4xx_eth_of_match[] = {
+ {
+ .compatible = "intel,ixp4xx-ethernet",
+ },
+ { },
+};
+
static struct platform_driver ixp4xx_eth_driver = {
- .driver.name = DRV_NAME,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = of_match_ptr(ixp4xx_eth_of_match),
+ },
.probe = ixp4xx_eth_probe,
.remove = ixp4xx_eth_remove,
};
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 072de880b99f..1ab94b5f9bbf 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -892,7 +892,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+ if (!pskb_inet_may_pull(skb))
return -EINVAL;
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
@@ -989,7 +989,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+ if (!pskb_inet_may_pull(skb))
return -EINVAL;
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7349a70af083..f682a5572d84 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2297,6 +2297,7 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
{
struct device *parent = vf_netdev->dev.parent;
struct net_device_context *ndev_ctx;
+ struct net_device *ndev;
struct pci_dev *pdev;
u32 serial;
@@ -2319,8 +2320,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
if (!ndev_ctx->vf_alloc)
continue;
- if (ndev_ctx->vf_serial == serial)
- return hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ndev_ctx->vf_serial != serial)
+ continue;
+
+ ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+ if (ndev->addr_len != vf_netdev->addr_len ||
+ memcmp(ndev->perm_addr, vf_netdev->perm_addr,
+ ndev->addr_len) != 0)
+ continue;
+
+ return ndev;
+
}
netdev_notice(vf_netdev,
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9a9a5cf36a4b..7427b989607e 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -423,18 +423,24 @@ static void macvlan_forward_source_one(struct sk_buff *skb,
macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false);
}
-static void macvlan_forward_source(struct sk_buff *skb,
+static bool macvlan_forward_source(struct sk_buff *skb,
struct macvlan_port *port,
const unsigned char *addr)
{
struct macvlan_source_entry *entry;
u32 idx = macvlan_eth_hash(addr);
struct hlist_head *h = &port->vlan_source_hash[idx];
+ bool consume = false;
hlist_for_each_entry_rcu(entry, h, hlist) {
- if (ether_addr_equal_64bits(entry->addr, addr))
+ if (ether_addr_equal_64bits(entry->addr, addr)) {
+ if (entry->vlan->flags & MACVLAN_FLAG_NODST)
+ consume = true;
macvlan_forward_source_one(skb, entry->vlan);
+ }
}
+
+ return consume;
}
/* called under rcu_read_lock() from netif_receive_skb */
@@ -463,7 +469,8 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_CONSUMED;
*pskb = skb;
eth = eth_hdr(skb);
- macvlan_forward_source(skb, port, eth->h_source);
+ if (macvlan_forward_source(skb, port, eth->h_source))
+ return RX_HANDLER_CONSUMED;
src = macvlan_hash_lookup(port, eth->h_source);
if (src && src->mode != MACVLAN_MODE_VEPA &&
src->mode != MACVLAN_MODE_BRIDGE) {
@@ -482,7 +489,8 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_PASS;
}
- macvlan_forward_source(skb, port, eth->h_source);
+ if (macvlan_forward_source(skb, port, eth->h_source))
+ return RX_HANDLER_CONSUMED;
if (macvlan_passthru(port))
vlan = list_first_or_null_rcu(&port->vlans,
struct macvlan_dev, list);
@@ -1286,7 +1294,8 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
if (data[IFLA_MACVLAN_FLAGS] &&
- nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC)
+ nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~(MACVLAN_FLAG_NOPROMISC |
+ MACVLAN_FLAG_NODST))
return -EINVAL;
if (data[IFLA_MACVLAN_MODE]) {
diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index 6eac50d4b42f..d453ec016168 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -11,6 +11,18 @@
#define XWAY_MDIO_IMASK 0x19 /* interrupt mask */
#define XWAY_MDIO_ISTAT 0x1A /* interrupt status */
+#define XWAY_MDIO_LED 0x1B /* led control */
+
+/* bit 15:12 are reserved */
+#define XWAY_MDIO_LED_LED3_EN BIT(11) /* Enable the integrated function of LED3 */
+#define XWAY_MDIO_LED_LED2_EN BIT(10) /* Enable the integrated function of LED2 */
+#define XWAY_MDIO_LED_LED1_EN BIT(9) /* Enable the integrated function of LED1 */
+#define XWAY_MDIO_LED_LED0_EN BIT(8) /* Enable the integrated function of LED0 */
+/* bit 7:4 are reserved */
+#define XWAY_MDIO_LED_LED3_DA BIT(3) /* Direct Access to LED3 */
+#define XWAY_MDIO_LED_LED2_DA BIT(2) /* Direct Access to LED2 */
+#define XWAY_MDIO_LED_LED1_DA BIT(1) /* Direct Access to LED1 */
+#define XWAY_MDIO_LED_LED0_DA BIT(0) /* Direct Access to LED0 */
#define XWAY_MDIO_INIT_WOL BIT(15) /* Wake-On-LAN */
#define XWAY_MDIO_INIT_MSRE BIT(14)
@@ -159,6 +171,15 @@ static int xway_gphy_config_init(struct phy_device *phydev)
/* Clear all pending interrupts */
phy_read(phydev, XWAY_MDIO_ISTAT);
+ /* Ensure that integrated led function is enabled for all leds */
+ err = phy_write(phydev, XWAY_MDIO_LED,
+ XWAY_MDIO_LED_LED0_EN |
+ XWAY_MDIO_LED_LED1_EN |
+ XWAY_MDIO_LED_LED2_EN |
+ XWAY_MDIO_LED_LED3_EN);
+ if (err)
+ return err;
+
phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LEDCH,
XWAY_MMD_LEDCH_NACS_NONE |
XWAY_MMD_LEDCH_SBF_F02HZ |
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e2b2b20c0dc5..a61fde7013bd 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -978,22 +978,28 @@ static int m88e1111_get_downshift(struct phy_device *phydev, u8 *data)
static int m88e1111_set_downshift(struct phy_device *phydev, u8 cnt)
{
- int val;
+ int val, err;
if (cnt > MII_M1111_PHY_EXT_CR_DOWNSHIFT_MAX)
return -E2BIG;
- if (!cnt)
- return phy_clear_bits(phydev, MII_M1111_PHY_EXT_CR,
- MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN);
+ if (!cnt) {
+ err = phy_clear_bits(phydev, MII_M1111_PHY_EXT_CR,
+ MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN);
+ } else {
+ val = MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN;
+ val |= FIELD_PREP(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, cnt - 1);
- val = MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN;
- val |= FIELD_PREP(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, cnt - 1);
+ err = phy_modify(phydev, MII_M1111_PHY_EXT_CR,
+ MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN |
+ MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK,
+ val);
+ }
- return phy_modify(phydev, MII_M1111_PHY_EXT_CR,
- MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN |
- MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK,
- val);
+ if (err < 0)
+ return err;
+
+ return genphy_soft_reset(phydev);
}
static int m88e1111_get_tunable(struct phy_device *phydev,
@@ -1036,22 +1042,28 @@ static int m88e1011_get_downshift(struct phy_device *phydev, u8 *data)
static int m88e1011_set_downshift(struct phy_device *phydev, u8 cnt)
{
- int val;
+ int val, err;
if (cnt > MII_M1011_PHY_SCR_DOWNSHIFT_MAX)
return -E2BIG;
- if (!cnt)
- return phy_clear_bits(phydev, MII_M1011_PHY_SCR,
- MII_M1011_PHY_SCR_DOWNSHIFT_EN);
+ if (!cnt) {
+ err = phy_clear_bits(phydev, MII_M1011_PHY_SCR,
+ MII_M1011_PHY_SCR_DOWNSHIFT_EN);
+ } else {
+ val = MII_M1011_PHY_SCR_DOWNSHIFT_EN;
+ val |= FIELD_PREP(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, cnt - 1);
- val = MII_M1011_PHY_SCR_DOWNSHIFT_EN;
- val |= FIELD_PREP(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, cnt - 1);
+ err = phy_modify(phydev, MII_M1011_PHY_SCR,
+ MII_M1011_PHY_SCR_DOWNSHIFT_EN |
+ MII_M1011_PHY_SCR_DOWNSHIFT_MASK,
+ val);
+ }
- return phy_modify(phydev, MII_M1011_PHY_SCR,
- MII_M1011_PHY_SCR_DOWNSHIFT_EN |
- MII_M1011_PHY_SCR_DOWNSHIFT_MASK,
- val);
+ if (err < 0)
+ return err;
+
+ return genphy_soft_reset(phydev);
}
static int m88e1011_get_tunable(struct phy_device *phydev,
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 9986f8969d02..136ea06540ff 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -767,8 +767,6 @@ enum rtl8152_flags {
PHY_RESET,
SCHEDULE_TASKLET,
GREEN_ETHERNET,
- DELL_TB_RX_AGG_BUG,
- LENOVO_MACPASSTHRU,
};
#define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2 0x3082
@@ -934,6 +932,8 @@ struct r8152 {
u32 fc_pause_on, fc_pause_off;
u32 support_2500full:1;
+ u32 lenovo_macpassthru:1;
+ u32 dell_tb_rx_agg_bug:1;
u16 ocp_base;
u16 speed;
u16 eee_adv;
@@ -1594,7 +1594,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
acpi_object_type mac_obj_type;
int mac_strlen;
- if (test_bit(LENOVO_MACPASSTHRU, &tp->flags)) {
+ if (tp->lenovo_macpassthru) {
mac_obj_name = "\\MACA";
mac_obj_type = ACPI_TYPE_STRING;
mac_strlen = 0x16;
@@ -2283,7 +2283,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
- if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+ if (tp->dell_tb_rx_agg_bug)
break;
}
@@ -6941,7 +6941,7 @@ static void r8153_init(struct r8152 *tp)
/* rx aggregation */
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
- if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+ if (tp->dell_tb_rx_agg_bug)
ocp_data |= RX_AGG_DISABLE;
ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
@@ -9447,7 +9447,7 @@ static int rtl8152_probe(struct usb_interface *intf,
switch (le16_to_cpu(udev->descriptor.idProduct)) {
case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2:
case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2:
- set_bit(LENOVO_MACPASSTHRU, &tp->flags);
+ tp->lenovo_macpassthru = 1;
}
}
@@ -9455,7 +9455,7 @@ static int rtl8152_probe(struct usb_interface *intf,
(!strcmp(udev->serial, "000001000000") ||
!strcmp(udev->serial, "000002000000"))) {
dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
- set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
+ tp->dell_tb_rx_agg_bug = 1;
}
netdev->ethtool_ops = &ops;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 4456abb9a074..34bde8c87324 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -40,6 +40,7 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
struct iwl_tfh_tfd *tfd;
+ unsigned long flags;
copy_size = sizeof(struct iwl_cmd_header_wide);
cmd_size = sizeof(struct iwl_cmd_header_wide);
@@ -108,14 +109,14 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
goto free_dup_buf;
}
- spin_lock_bh(&txq->lock);
+ spin_lock_irqsave(&txq->lock, flags);
idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr);
memset(tfd, 0, sizeof(*tfd));
if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
- spin_unlock_bh(&txq->lock);
+ spin_unlock_irqrestore(&txq->lock, flags);
IWL_ERR(trans, "No space in command queue\n");
iwl_op_mode_cmd_queue_full(trans->op_mode);
@@ -250,7 +251,7 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
spin_unlock(&trans_pcie->reg_lock);
out:
- spin_unlock_bh(&txq->lock);
+ spin_unlock_irqrestore(&txq->lock, flags);
free_dup_buf:
if (idx < 0)
kfree(dup_buf);
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 6a29fe11485d..8b77d08d4b47 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -458,7 +458,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
#else
-struct bpf_prog;
struct cgroup_bpf {};
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c9b7a876b0c8..ad4bcf1cadbb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -310,6 +310,7 @@ enum bpf_arg_type {
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
+ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
__BPF_ARG_TYPE_MAX,
};
@@ -930,7 +931,6 @@ struct bpf_link_primer {
};
struct bpf_struct_ops_value;
-struct btf_type;
struct btf_member;
#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
@@ -1955,6 +1955,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
+extern const struct bpf_func_proto bpf_snprintf_proto;
extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
@@ -2080,4 +2081,24 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
+enum bpf_printf_mod_type {
+ BPF_PRINTF_INT,
+ BPF_PRINTF_LONG,
+ BPF_PRINTF_LONG_LONG,
+};
+
+/* Workaround for getting va_list handling working with different argument type
+ * combinations generically for 32 and 64 bit archs.
+ */
+#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \
+ (mod[arg_nb] == BPF_PRINTF_LONG_LONG || \
+ (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64) \
+ ? (u64)args[arg_nb] \
+ : (u32)args[arg_nb])
+
+int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+ u64 *final_args, enum bpf_printf_mod_type *mod,
+ u32 num_args);
+void bpf_printf_cleanup(void);
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 51c2ffa3d901..6023a1367853 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -487,6 +487,15 @@ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id;
}
+/* unpack the IDs from the key as constructed above */
+static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id)
+{
+ if (obj_id)
+ *obj_id = key >> 32;
+ if (btf_id)
+ *btf_id = key & 0x7FFFFFFF;
+}
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 9cf1da2883c6..17109b65c1ac 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -65,8 +65,6 @@ struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
struct mlx5_eswitch_rep *rep, u32 sqn);
-u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
-
#ifdef CONFIG_MLX5_ESWITCH
enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
@@ -126,6 +124,8 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
#define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK
u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+
#else /* CONFIG_MLX5_ESWITCH */
static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
@@ -162,10 +162,17 @@ mlx5_eswitch_get_vport_metadata_mask(void)
{
return 0;
}
+
+static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+ return 0;
+}
+
#endif /* CONFIG_MLX5_ESWITCH */
static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
{
return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS;
}
+
#endif
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 4db87bcfce7b..aad53cb72f17 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,14 +36,6 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/device.h>
-#define MLX5_VPORT_PF_PLACEHOLDER (1u)
-#define MLX5_VPORT_UPLINK_PLACEHOLDER (1u)
-#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev) (mlx5_ecpf_vport_exists(mdev))
-
-#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER + \
- MLX5_VPORT_UPLINK_PLACEHOLDER + \
- MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
-
#define MLX5_VPORT_MANAGER(mdev) \
(MLX5_CAP_GEN(mdev, vport_group_manager) && \
(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index d4c14257db5d..515ce53aa20d 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -7,21 +7,26 @@
#include <net/netlink.h>
#include <uapi/linux/netfilter/nfnetlink.h>
+struct nfnl_info {
+ struct net *net;
+ struct sock *sk;
+ const struct nlmsghdr *nlh;
+ struct netlink_ext_ack *extack;
+};
+
+enum nfnl_callback_type {
+ NFNL_CB_UNSPEC = 0,
+ NFNL_CB_MUTEX,
+ NFNL_CB_RCU,
+ NFNL_CB_BATCH,
+};
+
struct nfnl_callback {
- int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack);
- int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack);
- int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack);
- const struct nla_policy *policy; /* netlink attribute policy */
- const u_int16_t attr_count; /* number of nlattr's */
+ int (*call)(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const cda[]);
+ const struct nla_policy *policy;
+ enum nfnl_callback_type type;
+ __u16 attr_count;
};
enum nfnl_abort_action {
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 8ec48466410a..07c6ad8f2a02 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -158,7 +158,7 @@ struct xt_match {
/* Called when entry of this type deleted. */
void (*destroy)(const struct xt_mtdtor_param *);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
/* Called when userspace align differs from kernel space one */
void (*compat_from_user)(void *dst, const void *src);
int (*compat_to_user)(void __user *dst, const void *src);
@@ -169,7 +169,7 @@ struct xt_match {
const char *table;
unsigned int matchsize;
unsigned int usersize;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
unsigned int compatsize;
#endif
unsigned int hooks;
@@ -199,7 +199,7 @@ struct xt_target {
/* Called when entry of this type deleted. */
void (*destroy)(const struct xt_tgdtor_param *);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
/* Called when userspace align differs from kernel space one */
void (*compat_from_user)(void *dst, const void *src);
int (*compat_to_user)(void __user *dst, const void *src);
@@ -210,7 +210,7 @@ struct xt_target {
const char *table;
unsigned int targetsize;
unsigned int usersize;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
unsigned int compatsize;
#endif
unsigned int hooks;
@@ -229,6 +229,9 @@ struct xt_table {
/* Man behind the curtain... */
struct xt_table_info *private;
+ /* hook ops that register the table with the netfilter core */
+ struct nf_hook_ops *ops;
+
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
@@ -322,6 +325,7 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
int xt_find_revision(u8 af, const char *name, u8 revision, int target,
int *err);
+struct xt_table *xt_find_table(struct net *net, u8 af, const char *name);
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name);
struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
@@ -448,7 +452,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
struct compat_xt_entry_match {
@@ -529,5 +533,5 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
unsigned int target_offset,
unsigned int next_offset);
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
#endif /* _X_TABLES_H */
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 26a13294318c..2aab9612f6ab 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -51,15 +51,15 @@ struct arpt_error {
extern void *arpt_alloc_initial_table(const struct xt_table *);
int arpt_register_table(struct net *net, const struct xt_table *table,
const struct arpt_replace *repl,
- const struct nf_hook_ops *ops, struct xt_table **res);
-void arpt_unregister_table(struct net *net, struct xt_table *table);
-void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops);
+void arpt_unregister_table(struct net *net, const char *name);
+void arpt_unregister_table_pre_exit(struct net *net, const char *name,
const struct nf_hook_ops *ops);
extern unsigned int arpt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
struct compat_arpt_entry {
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 3a956145a25c..a8178253ce53 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -100,6 +100,7 @@ struct ebt_table {
unsigned int valid_hooks);
/* the data used by the kernel */
struct ebt_table_info *private;
+ struct nf_hook_ops *ops;
struct module *me;
};
@@ -108,11 +109,9 @@ struct ebt_table {
extern int ebt_register_table(struct net *net,
const struct ebt_table *table,
- const struct nf_hook_ops *ops,
- struct ebt_table **res);
-extern void ebt_unregister_table(struct net *net, struct ebt_table *table);
-void ebt_unregister_table_pre_exit(struct net *net, const char *tablename,
- const struct nf_hook_ops *ops);
+ const struct nf_hook_ops *ops);
+extern void ebt_unregister_table(struct net *net, const char *tablename);
+void ebt_unregister_table_pre_exit(struct net *net, const char *tablename);
extern unsigned int ebt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct ebt_table *table);
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index c4676d6feeff..8d09bfe850dc 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -24,15 +24,10 @@
int ipt_register_table(struct net *net, const struct xt_table *table,
const struct ipt_replace *repl,
- const struct nf_hook_ops *ops, struct xt_table **res);
-
-void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
const struct nf_hook_ops *ops);
-void ipt_unregister_table_exit(struct net *net, struct xt_table *table);
-
-void ipt_unregister_table(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops);
+void ipt_unregister_table_pre_exit(struct net *net, const char *name);
+void ipt_unregister_table_exit(struct net *net, const char *name);
/* Standard entry. */
struct ipt_standard {
@@ -72,7 +67,7 @@ extern unsigned int ipt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
struct compat_ipt_entry {
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 1547d5f9ae06..79e73fd7d965 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -26,17 +26,14 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *);
int ip6t_register_table(struct net *net, const struct xt_table *table,
const struct ip6t_replace *repl,
- const struct nf_hook_ops *ops, struct xt_table **res);
-void ip6t_unregister_table(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops);
-void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops);
-void ip6t_unregister_table_exit(struct net *net, struct xt_table *table);
+ const struct nf_hook_ops *ops);
+void ip6t_unregister_table_pre_exit(struct net *net, const char *name);
+void ip6t_unregister_table_exit(struct net *net, const char *name);
extern unsigned int ip6t_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
struct compat_ip6t_entry {
diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h
index 6f652ea0c6ae..114b0940729f 100644
--- a/include/linux/platform_data/eth_ixp4xx.h
+++ b/include/linux/platform_data/eth_ixp4xx.h
@@ -14,6 +14,8 @@ struct eth_plat_info {
u8 rxq; /* configurable, currently 0 - 31 only */
u8 txreadyq;
u8 hwaddr[6];
+ u8 npe; /* NPE instance used by this interface */
+ bool has_mdio; /* If this instance has an MDIO bus */
};
#endif
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index e242bf3d2b4a..aba0f0f429be 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -99,7 +99,8 @@ struct sk_psock {
void (*saved_close)(struct sock *sk, long timeout);
void (*saved_write_space)(struct sock *sk);
void (*saved_data_ready)(struct sock *sk);
- int (*psock_update_sk_prot)(struct sock *sk, bool restore);
+ int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock,
+ bool restore);
struct proto *sk_proto;
struct mutex work_mutex;
struct sk_psock_work_state work_state;
@@ -404,7 +405,7 @@ static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
if (psock->psock_update_sk_prot)
- psock->psock_update_sk_prot(sk, true);
+ psock->psock_update_sk_prot(sk, psock, true);
}
static inline void sk_psock_set_state(struct sk_psock *psock,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 853420db5d32..7c984cadfec4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -98,11 +98,13 @@ struct devlink_port_pci_vf_attrs {
* @controller: Associated controller number
* @sf: Associated PCI SF for of the PCI PF for this port.
* @pf: Associated PCI PF number for this port.
+ * @external: when set, indicates if a port is for an external controller
*/
struct devlink_port_pci_sf_attrs {
u32 controller;
u32 sf;
u16 pf;
+ u8 external:1;
};
/**
@@ -1508,7 +1510,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
u16 pf, u16 vf, bool external);
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
- u32 controller, u16 pf, u32 sf);
+ u32 controller, u16 pf, u32 sf,
+ bool external);
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
index bcbd724cc048..7fda9ce9f694 100644
--- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
@@ -3,6 +3,7 @@
#define _NF_DEFRAG_IPV4_H
struct net;
-int nf_defrag_ipv4_enable(struct net *);
+int nf_defrag_ipv4_enable(struct net *net);
+void nf_defrag_ipv4_disable(struct net *net);
#endif /* _NF_DEFRAG_IPV4_H */
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index ece923e2035b..0fd8a4159662 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -5,7 +5,8 @@
#include <linux/skbuff.h>
#include <linux/types.h>
-int nf_defrag_ipv6_enable(struct net *);
+int nf_defrag_ipv6_enable(struct net *net);
+void nf_defrag_ipv6_disable(struct net *net);
int nf_ct_frag6_init(void);
void nf_ct_frag6_cleanup(void);
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 0d412dd63707..987111ae5240 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -104,8 +104,6 @@ unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
-int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family);
-
static inline int nf_nat_initialized(struct nf_conn *ct,
enum nf_nat_manip_type manip)
{
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4a75da2a2e1d..eb708b77c4a5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -13,6 +13,7 @@
#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
#include <net/flow_offload.h>
+#include <net/netns/generic.h>
#define NFT_MAX_HOOKS (NF_INET_INGRESS + 1)
@@ -1580,4 +1581,11 @@ struct nftables_pernet {
u8 validate_state;
};
+extern unsigned int nf_tables_net_id;
+
+static inline struct nftables_pernet *nft_pernet(const struct net *net)
+{
+ return net_generic(net, nf_tables_net_id);
+}
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 87e1612497ea..f6af8d96d3c6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -76,16 +76,6 @@ struct netns_ipv4 {
struct inet_peer_base *peers;
struct sock * __percpu *tcp_sk;
struct fqdir *fqdir;
-#ifdef CONFIG_NETFILTER
- struct xt_table *iptable_filter;
- struct xt_table *iptable_mangle;
- struct xt_table *iptable_raw;
- struct xt_table *arptable_filter;
-#ifdef CONFIG_SECURITY
- struct xt_table *iptable_security;
-#endif
- struct xt_table *nat_table;
-#endif
u8 sysctl_icmp_echo_ignore_all;
u8 sysctl_icmp_echo_enable_probe;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 808f0f79ea9c..6153c8067009 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -63,15 +63,6 @@ struct netns_ipv6 {
struct ipv6_devconf *devconf_dflt;
struct inet_peer_base *peers;
struct fqdir *fqdir;
-#ifdef CONFIG_NETFILTER
- struct xt_table *ip6table_filter;
- struct xt_table *ip6table_mangle;
- struct xt_table *ip6table_raw;
-#ifdef CONFIG_SECURITY
- struct xt_table *ip6table_security;
-#endif
- struct xt_table *ip6table_nat;
-#endif
struct fib6_info *fib6_null_entry;
struct rt6_info *ip6_null_entry;
struct rt6_statistics *rt6_stats;
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 83c8ea2e87a6..d02316ec2906 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -5,16 +5,8 @@
#include <linux/list.h>
#include <linux/netfilter_defs.h>
-struct ebt_table;
-
struct netns_xt {
bool notrack_deprecated_warning;
bool clusterip_deprecated_warning;
-#if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
- defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
- struct ebt_table *broute_table;
- struct ebt_table *frame_filter;
- struct ebt_table *frame_nat;
-#endif
};
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index cadcc12cc316..42bc5e1a627f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1118,6 +1118,7 @@ struct inet_hashinfo;
struct raw_hashinfo;
struct smc_hashinfo;
struct module;
+struct sk_psock;
/*
* caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
@@ -1189,7 +1190,9 @@ struct proto {
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
#ifdef CONFIG_BPF_SYSCALL
- int (*psock_update_sk_prot)(struct sock *sk, bool restore);
+ int (*psock_update_sk_prot)(struct sock *sk,
+ struct sk_psock *psock,
+ bool restore);
#endif
/* Keeping track of sockets in use */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eaea43afcc97..d05193cb0d99 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2215,7 +2215,7 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-int tcp_bpf_update_proto(struct sock *sk, bool restore);
+int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/include/net/udp.h b/include/net/udp.h
index f55aaeef7e91..360df454356c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -543,7 +543,7 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-int udp_bpf_update_proto(struct sock *sk, bool restore);
+int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
#endif
#endif /* _UDP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 49371eba98ba..ec6d85a81744 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -312,6 +312,27 @@ union bpf_iter_link_info {
* *ctx_out*, *data_out* (for example, packet data), result of the
* execution *retval*, and *duration* of the test run.
*
+ * The sizes of the buffers provided as input and output
+ * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ * be provided in the corresponding variables *ctx_size_in*,
+ * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ * of these parameters are not provided (ie set to NULL), the
+ * corresponding size field must be zero.
+ *
+ * Some program types have particular requirements:
+ *
+ * **BPF_PROG_TYPE_SK_LOOKUP**
+ * *data_in* and *data_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_XDP**
+ * *ctx_in* and *ctx_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ * *ctx_out*, *data_in* and *data_out* must be NULL.
+ * *repeat* must be zero.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
@@ -4061,12 +4082,20 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * An adaptive notification is a notification sent whenever the user-space
+ * process has caught up and consumed all available payloads. In case the user-space
+ * process is still processing a previous payload, then no notification is needed
+ * as it will process the newly added payload automatically.
* Return
* 0 on success, or a negative error in case of failure.
*
* void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
* Description
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * *flags* must be 0.
* Return
* Valid pointer with *size* bytes of memory available; NULL,
* otherwise.
@@ -4078,6 +4107,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4088,6 +4121,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4671,6 +4708,33 @@ union bpf_attr {
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ * Description
+ * Outputs a string into the **str** buffer of size **str_size**
+ * based on a format string stored in a read-only map pointed by
+ * **fmt**.
+ *
+ * Each format specifier in **fmt** corresponds to one u64 element
+ * in the **data** array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data*
+ * array. The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ * memory. Reading kernel memory may fail due to either invalid
+ * address or valid address but requiring a major memory fault. If
+ * reading kernel memory fails, the string for **%s** will be an
+ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ * Not returning error to bpf program is consistent with what
+ * **bpf_trace_printk**\ () does for now.
+ *
+ * Return
+ * The strictly positive length of the formatted string, including
+ * the trailing zero character. If the return value is greater than
+ * **str_size**, **str** contains a truncated string, guaranteed to
+ * be zero-terminated except when **str_size** is 0.
+ *
+ * Or **-EBUSY** if the per-CPU memory copy buffer is busy.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4838,6 +4902,7 @@ union bpf_attr {
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
+ FN(snprintf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5379,6 +5444,8 @@ struct bpf_link_info {
} raw_tracepoint;
struct {
__u32 attach_type;
+ __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+ __u32 target_btf_id; /* BTF type id inside the object */
} tracing;
struct {
__u64 cgroup_id;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 91c8dda6d95d..cd5b382a4138 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -614,6 +614,7 @@ enum macvlan_macaddr_mode {
};
#define MACVLAN_FLAG_NOPROMISC 1
+#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */
/* VRF section */
enum {
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 79bab7a36b30..467365ed59a7 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1014,11 +1014,13 @@ enum nft_rt_attributes {
*
* @NFTA_SOCKET_KEY: socket key to match
* @NFTA_SOCKET_DREG: destination register
+ * @NFTA_SOCKET_LEVEL: cgroups2 ancestor level (only for cgroupsv2)
*/
enum nft_socket_attributes {
NFTA_SOCKET_UNSPEC,
NFTA_SOCKET_KEY,
NFTA_SOCKET_DREG,
+ NFTA_SOCKET_LEVEL,
__NFTA_SOCKET_MAX
};
#define NFTA_SOCKET_MAX (__NFTA_SOCKET_MAX - 1)
@@ -1029,11 +1031,13 @@ enum nft_socket_attributes {
* @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
* @NFT_SOCKET_MARK: Value of the socket mark
* @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0)
+ * @NFT_SOCKET_CGROUPV2: Match on cgroups version 2
*/
enum nft_socket_keys {
NFT_SOCKET_TRANSPARENT,
NFT_SOCKET_MARK,
NFT_SOCKET_WILDCARD,
+ NFT_SOCKET_CGROUPV2,
__NFT_SOCKET_MAX
};
#define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f5423251c118..5e31ee9f7512 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1363,11 +1363,10 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* __bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
* @insn: is the array of eBPF instructions
- * @stack: is the eBPF storage stack
*
* Decode and execute eBPF instructions.
*/
-static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
{
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
@@ -1701,7 +1700,7 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
ARG1 = (u64) (unsigned long) ctx; \
- return ___bpf_prog_run(regs, insn, stack); \
+ return ___bpf_prog_run(regs, insn); \
}
#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
@@ -1718,7 +1717,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
BPF_R3 = r3; \
BPF_R4 = r4; \
BPF_R5 = r5; \
- return ___bpf_prog_run(regs, insn, stack); \
+ return ___bpf_prog_run(regs, insn); \
}
#define EVAL1(FN, X) FN(X)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f306611c4ddf..85b26ca5aacd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -669,6 +669,310 @@ const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
};
+static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
+ size_t bufsz)
+{
+ void __user *user_ptr = (__force void __user *)unsafe_ptr;
+
+ buf[0] = 0;
+
+ switch (fmt_ptype) {
+ case 's':
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ if ((unsigned long)unsafe_ptr < TASK_SIZE)
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+ fallthrough;
+#endif
+ case 'k':
+ return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
+ case 'u':
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+ }
+
+ return -EINVAL;
+}
+
+/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+ */
+#define MAX_PRINTF_BUF_LEN 512
+
+struct bpf_printf_buf {
+ char tmp_buf[MAX_PRINTF_BUF_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
+static DEFINE_PER_CPU(int, bpf_printf_buf_used);
+
+static int try_get_fmt_tmp_buf(char **tmp_buf)
+{
+ struct bpf_printf_buf *bufs;
+ int used;
+
+ if (*tmp_buf)
+ return 0;
+
+ preempt_disable();
+ used = this_cpu_inc_return(bpf_printf_buf_used);
+ if (WARN_ON_ONCE(used > 1)) {
+ this_cpu_dec(bpf_printf_buf_used);
+ preempt_enable();
+ return -EBUSY;
+ }
+ bufs = this_cpu_ptr(&bpf_printf_buf);
+ *tmp_buf = bufs->tmp_buf;
+
+ return 0;
+}
+
+void bpf_printf_cleanup(void)
+{
+ if (this_cpu_read(bpf_printf_buf_used)) {
+ this_cpu_dec(bpf_printf_buf_used);
+ preempt_enable();
+ }
+}
+
+/*
+ * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
+ *
+ * Returns a negative value if fmt is an invalid format string or 0 otherwise.
+ *
+ * This can be used in two ways:
+ * - Format string verification only: when final_args and mod are NULL
+ * - Arguments preparation: in addition to the above verification, it writes in
+ * final_args a copy of raw_args where pointers from BPF have been sanitized
+ * into pointers safe to use by snprintf. This also writes in the mod array
+ * the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
+ *
+ * In argument preparation mode, if 0 is returned, safe temporary buffers are
+ * allocated and bpf_printf_cleanup should be called to free them after use.
+ */
+int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+ u64 *final_args, enum bpf_printf_mod_type *mod,
+ u32 num_args)
+{
+ char *unsafe_ptr = NULL, *tmp_buf = NULL, *fmt_end;
+ size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
+ int err, i, num_spec = 0, copy_size;
+ enum bpf_printf_mod_type cur_mod;
+ u64 cur_arg;
+ char fmt_ptype;
+
+ if (!!final_args != !!mod)
+ return -EINVAL;
+
+ fmt_end = strnchr(fmt, fmt_size, 0);
+ if (!fmt_end)
+ return -EINVAL;
+ fmt_size = fmt_end - fmt;
+
+ for (i = 0; i < fmt_size; i++) {
+ if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (fmt[i] != '%')
+ continue;
+
+ if (fmt[i + 1] == '%') {
+ i++;
+ continue;
+ }
+
+ if (num_spec >= num_args) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* The string is zero-terminated so if fmt[i] != 0, we can
+ * always access fmt[i + 1], in the worst case it will be a 0
+ */
+ i++;
+
+ /* skip optional "[0 +-][num]" width formatting field */
+ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
+ fmt[i] == ' ')
+ i++;
+ if (fmt[i] >= '1' && fmt[i] <= '9') {
+ i++;
+ while (fmt[i] >= '0' && fmt[i] <= '9')
+ i++;
+ }
+
+ if (fmt[i] == 'p') {
+ cur_mod = BPF_PRINTF_LONG;
+
+ if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
+ fmt[i + 2] == 's') {
+ fmt_ptype = fmt[i + 1];
+ i += 2;
+ goto fmt_str;
+ }
+
+ if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
+ ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
+ fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
+ fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
+ /* just kernel pointers */
+ if (final_args)
+ cur_arg = raw_args[num_spec];
+ goto fmt_next;
+ }
+
+ /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+ if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
+ (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ i += 2;
+ if (!final_args)
+ goto fmt_next;
+
+ if (try_get_fmt_tmp_buf(&tmp_buf)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+ if (tmp_buf_len < copy_size) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ unsafe_ptr = (char *)(long)raw_args[num_spec];
+ err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr,
+ copy_size);
+ if (err < 0)
+ memset(tmp_buf, 0, copy_size);
+ cur_arg = (u64)(long)tmp_buf;
+ tmp_buf += copy_size;
+ tmp_buf_len -= copy_size;
+
+ goto fmt_next;
+ } else if (fmt[i] == 's') {
+ cur_mod = BPF_PRINTF_LONG;
+ fmt_ptype = fmt[i];
+fmt_str:
+ if (fmt[i + 1] != 0 &&
+ !isspace(fmt[i + 1]) &&
+ !ispunct(fmt[i + 1])) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!final_args)
+ goto fmt_next;
+
+ if (try_get_fmt_tmp_buf(&tmp_buf)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (!tmp_buf_len) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ unsafe_ptr = (char *)(long)raw_args[num_spec];
+ err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
+ fmt_ptype, tmp_buf_len);
+ if (err < 0) {
+ tmp_buf[0] = '\0';
+ err = 1;
+ }
+
+ cur_arg = (u64)(long)tmp_buf;
+ tmp_buf += err;
+ tmp_buf_len -= err;
+
+ goto fmt_next;
+ }
+
+ cur_mod = BPF_PRINTF_INT;
+
+ if (fmt[i] == 'l') {
+ cur_mod = BPF_PRINTF_LONG;
+ i++;
+ }
+ if (fmt[i] == 'l') {
+ cur_mod = BPF_PRINTF_LONG_LONG;
+ i++;
+ }
+
+ if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
+ fmt[i] != 'x' && fmt[i] != 'X') {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (final_args)
+ cur_arg = raw_args[num_spec];
+fmt_next:
+ if (final_args) {
+ mod[num_spec] = cur_mod;
+ final_args[num_spec] = cur_arg;
+ }
+ num_spec++;
+ }
+
+ err = 0;
+cleanup:
+ if (err)
+ bpf_printf_cleanup();
+out:
+ return err;
+}
+
+#define MAX_SNPRINTF_VARARGS 12
+
+BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
+ const void *, data, u32, data_len)
+{
+ enum bpf_printf_mod_type mod[MAX_SNPRINTF_VARARGS];
+ u64 args[MAX_SNPRINTF_VARARGS];
+ int err, num_args;
+
+ if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
+ num_args = data_len / 8;
+
+ /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
+ * can safely give an unbounded size.
+ */
+ err = bpf_printf_prepare(fmt, UINT_MAX, data, args, mod, num_args);
+ if (err < 0)
+ return err;
+
+ /* Maximumly we can have MAX_SNPRINTF_VARARGS parameters, just give
+ * all of them to snprintf().
+ */
+ err = snprintf(str, str_size, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+ BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+ BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+ BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+ BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+ BPF_CAST_FMT_ARG(11, args, mod));
+
+ bpf_printf_cleanup();
+
+ return err + 1;
+}
+
+const struct bpf_func_proto bpf_snprintf_proto = {
+ .func = bpf_snprintf,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_PTR_TO_CONST_STR,
+ .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -757,6 +1061,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_probe_read_kernel_str_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
+ case BPF_FUNC_snprintf:
+ return &bpf_snprintf_proto;
default:
return NULL;
}
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index d2de2abec35b..b4ebd60a6c16 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -816,8 +816,6 @@ static int __init bpf_init(void)
{
int ret;
- mutex_init(&bpf_preload_lock);
-
ret = sysfs_create_mount_point(fs_kobj, "bpf");
if (ret)
return ret;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6428634da57e..fd495190115e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2551,6 +2551,9 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
container_of(link, struct bpf_tracing_link, link);
info->tracing.attach_type = tr_link->attach_type;
+ bpf_trampoline_unpack_key(tr_link->trampoline->key,
+ &info->tracing.target_obj_id,
+ &info->tracing.target_btf_id);
return 0;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5682a02901d3..637462e9b6ee 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4787,6 +4787,7 @@ static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALU
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
+static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4817,6 +4818,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
+ [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -5067,6 +5069,44 @@ skip_type_check:
if (err)
return err;
err = check_ptr_alignment(env, reg, 0, size, true);
+ } else if (arg_type == ARG_PTR_TO_CONST_STR) {
+ struct bpf_map *map = reg->map_ptr;
+ int map_off;
+ u64 map_addr;
+ char *str_ptr;
+
+ if (!bpf_map_is_rdonly(map)) {
+ verbose(env, "R%d does not point to a readonly map'\n", regno);
+ return -EACCES;
+ }
+
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "R%d is not a constant address'\n", regno);
+ return -EACCES;
+ }
+
+ if (!map->ops->map_direct_value_addr) {
+ verbose(env, "no direct value access support for this map type\n");
+ return -EACCES;
+ }
+
+ err = check_map_access(env, regno, reg->off,
+ map->value_size - reg->off, false);
+ if (err)
+ return err;
+
+ map_off = reg->off + reg->var_off.value;
+ err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
+ if (err) {
+ verbose(env, "direct value access on string failed\n");
+ return err;
+ }
+
+ str_ptr = (char *)(long)(map_addr);
+ if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
+ verbose(env, "string is not zero-terminated\n");
+ return -EINVAL;
+ }
}
return err;
@@ -5767,6 +5807,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
if (ret_type != RET_INTEGER ||
(func_id != BPF_FUNC_get_stack &&
+ func_id != BPF_FUNC_get_task_stack &&
func_id != BPF_FUNC_probe_read_str &&
func_id != BPF_FUNC_probe_read_kernel_str &&
func_id != BPF_FUNC_probe_read_user_str))
@@ -5877,6 +5918,43 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
+static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs)
+{
+ struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
+ struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
+ struct bpf_map *fmt_map = fmt_reg->map_ptr;
+ int err, fmt_map_off, num_args;
+ u64 fmt_addr;
+ char *fmt;
+
+ /* data must be an array of u64 */
+ if (data_len_reg->var_off.value % 8)
+ return -EINVAL;
+ num_args = data_len_reg->var_off.value / 8;
+
+ /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
+ * and map_direct_value_addr is set.
+ */
+ fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
+ err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
+ fmt_map_off);
+ if (err) {
+ verbose(env, "verifier bug\n");
+ return -EFAULT;
+ }
+ fmt = (char *)(long)fmt_addr + fmt_map_off;
+
+ /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
+ * can focus on validating the format specifiers.
+ */
+ err = bpf_printf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args);
+ if (err < 0)
+ verbose(env, "Invalid format string\n");
+
+ return err;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -5991,6 +6069,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
+ if (func_id == BPF_FUNC_snprintf) {
+ err = check_bpf_snprintf_call(env, regs);
+ if (err < 0)
+ return err;
+ }
+
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0d23755c2747..2a8bcdc927c7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -372,188 +372,38 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
- size_t bufsz)
-{
- void __user *user_ptr = (__force void __user *)unsafe_ptr;
-
- buf[0] = 0;
-
- switch (fmt_ptype) {
- case 's':
-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
- if ((unsigned long)unsafe_ptr < TASK_SIZE) {
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
- }
- fallthrough;
-#endif
- case 'k':
- strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
- break;
- case 'u':
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
- }
-}
-
static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-#define BPF_TRACE_PRINTK_SIZE 1024
+#define MAX_TRACE_PRINTK_VARARGS 3
+#define BPF_TRACE_PRINTK_SIZE 1024
-static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+ u64, arg2, u64, arg3)
{
+ u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
+ enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
static char buf[BPF_TRACE_PRINTK_SIZE];
unsigned long flags;
- va_list ap;
int ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- va_start(ap, fmt);
- ret = vsnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
- /* vsnprintf() will not append null for zero-length strings */
+ ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
+ MAX_TRACE_PRINTK_VARARGS);
+ if (ret < 0)
+ return ret;
+
+ ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
+ /* snprintf() will not append null for zero-length strings */
if (ret == 0)
buf[0] = '\0';
+
+ raw_spin_lock_irqsave(&trace_printk_lock, flags);
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
- return ret;
-}
-
-/*
- * Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
- */
-BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
- u64, arg2, u64, arg3)
-{
- int i, mod[3] = {}, fmt_cnt = 0;
- char buf[64], fmt_ptype;
- void *unsafe_ptr = NULL;
- bool str_seen = false;
+ bpf_printf_cleanup();
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- return -EINVAL;
-
- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
- return -EINVAL;
-
- if (fmt[i] != '%')
- continue;
-
- if (fmt_cnt >= 3)
- return -EINVAL;
-
- /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
- i++;
- if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
- i++;
- } else if (fmt[i] == 'p') {
- mod[fmt_cnt]++;
- if ((fmt[i + 1] == 'k' ||
- fmt[i + 1] == 'u') &&
- fmt[i + 2] == 's') {
- fmt_ptype = fmt[i + 1];
- i += 2;
- goto fmt_str;
- }
-
- if (fmt[i + 1] == 'B') {
- i++;
- goto fmt_next;
- }
-
- /* disallow any further format extensions */
- if (fmt[i + 1] != 0 &&
- !isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
-
- goto fmt_next;
- } else if (fmt[i] == 's') {
- mod[fmt_cnt]++;
- fmt_ptype = fmt[i];
-fmt_str:
- if (str_seen)
- /* allow only one '%s' per fmt string */
- return -EINVAL;
- str_seen = true;
-
- if (fmt[i + 1] != 0 &&
- !isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
-
- switch (fmt_cnt) {
- case 0:
- unsafe_ptr = (void *)(long)arg1;
- arg1 = (long)buf;
- break;
- case 1:
- unsafe_ptr = (void *)(long)arg2;
- arg2 = (long)buf;
- break;
- case 2:
- unsafe_ptr = (void *)(long)arg3;
- arg3 = (long)buf;
- break;
- }
-
- bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
- sizeof(buf));
- goto fmt_next;
- }
-
- if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
- i++;
- }
-
- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x')
- return -EINVAL;
-fmt_next:
- fmt_cnt++;
- }
-
-/* Horrid workaround for getting va_list handling working with different
- * argument type combinations generically for 32 and 64 bit archs.
- */
-#define __BPF_TP_EMIT() __BPF_ARG3_TP()
-#define __BPF_TP(...) \
- bpf_do_trace_printk(fmt, ##__VA_ARGS__)
-
-#define __BPF_ARG1_TP(...) \
- ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_TP(arg1, ##__VA_ARGS__) \
- : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
- : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
-
-#define __BPF_ARG2_TP(...) \
- ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
- : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
- : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
-
-#define __BPF_ARG3_TP(...) \
- ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
- : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
- : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
-
- return __BPF_TP_EMIT();
+ return ret;
}
static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -581,184 +431,37 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
}
#define MAX_SEQ_PRINTF_VARARGS 12
-#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
-#define MAX_SEQ_PRINTF_STR_LEN 128
-
-struct bpf_seq_printf_buf {
- char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
-};
-static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
-static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
const void *, data, u32, data_len)
{
- int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
- int i, buf_used, copy_size, num_args;
- u64 params[MAX_SEQ_PRINTF_VARARGS];
- struct bpf_seq_printf_buf *bufs;
- const u64 *args = data;
-
- buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
- if (WARN_ON_ONCE(buf_used > 1)) {
- err = -EBUSY;
- goto out;
- }
-
- bufs = this_cpu_ptr(&bpf_seq_printf_buf);
-
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- goto out;
-
- if (data_len & 7)
- goto out;
-
- for (i = 0; i < fmt_size; i++) {
- if (fmt[i] == '%') {
- if (fmt[i + 1] == '%')
- i++;
- else if (!data || !data_len)
- goto out;
- }
- }
+ enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
+ u64 args[MAX_SEQ_PRINTF_VARARGS];
+ int err, num_args;
+ if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
num_args = data_len / 8;
- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- /* only printable ascii for now. */
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
- err = -EINVAL;
- goto out;
- }
-
- if (fmt[i] != '%')
- continue;
-
- if (fmt[i + 1] == '%') {
- i++;
- continue;
- }
-
- if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
- err = -E2BIG;
- goto out;
- }
-
- if (fmt_cnt >= num_args) {
- err = -EINVAL;
- goto out;
- }
-
- /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
- i++;
-
- /* skip optional "[0 +-][num]" width formating field */
- while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
- fmt[i] == ' ')
- i++;
- if (fmt[i] >= '1' && fmt[i] <= '9') {
- i++;
- while (fmt[i] >= '0' && fmt[i] <= '9')
- i++;
- }
-
- if (fmt[i] == 's') {
- void *unsafe_ptr;
-
- /* try our best to copy */
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
- unsafe_ptr = (void *)(long)args[fmt_cnt];
- err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN);
- if (err < 0)
- bufs->buf[memcpy_cnt][0] = '\0';
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'p') {
- if (fmt[i + 1] == 0 ||
- fmt[i + 1] == 'K' ||
- fmt[i + 1] == 'x' ||
- fmt[i + 1] == 'B') {
- /* just kernel pointers */
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- continue;
- }
-
- /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
- if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
- err = -EINVAL;
- goto out;
- }
- if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
- err = -EINVAL;
- goto out;
- }
-
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
-
- copy_size = (fmt[i + 2] == '4') ? 4 : 16;
-
- err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- (void *) (long) args[fmt_cnt],
- copy_size);
- if (err < 0)
- memset(bufs->buf[memcpy_cnt], 0, copy_size);
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- i += 2;
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'l') {
- i++;
- if (fmt[i] == 'l')
- i++;
- }
-
- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x' &&
- fmt[i] != 'X') {
- err = -EINVAL;
- goto out;
- }
-
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- }
+ err = bpf_printf_prepare(fmt, fmt_size, data, args, mod, num_args);
+ if (err < 0)
+ return err;
/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
* all of them to seq_printf().
*/
- seq_printf(m, fmt, params[0], params[1], params[2], params[3],
- params[4], params[5], params[6], params[7], params[8],
- params[9], params[10], params[11]);
+ seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+ BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+ BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+ BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+ BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+ BPF_CAST_FMT_ARG(11, args, mod));
- err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
-out:
- this_cpu_dec(bpf_seq_printf_buf_used);
- return err;
+ bpf_printf_cleanup();
+
+ return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}
BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
@@ -1373,6 +1076,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
+ case BPF_FUNC_snprintf:
+ return &bpf_snprintf_proto;
default:
return NULL;
}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8b644113715e..fb3d3262dc1a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -71,6 +71,9 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
if (array == NULL)
return -ENOBUFS;
+ /* paired with smp_rmb() in __vlan_group_get_device() */
+ smp_wmb();
+
vg->vlan_devices_arrays[pidx][vidx] = array;
return 0;
}
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 953405362795..fa3ad3d4d58c 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -57,6 +57,10 @@ static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
array = vg->vlan_devices_arrays[pidx]
[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+
+ /* paired with smp_wmb() in vlan_group_prealloc_vid() */
+ smp_rmb();
+
return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
}
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index fa199556e122..e16183bd1bb8 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -87,7 +87,7 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
/*
* no conversion function needed --
* only avg/burst have meaningful values in userspace.
@@ -107,7 +107,7 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = {
.checkentry = ebt_limit_mt_check,
.matchsize = sizeof(struct ebt_limit_info),
.usersize = offsetof(struct ebt_limit_info, prev),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(struct ebt_compat_limit_info),
#endif
.me = THIS_MODULE,
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 21fd3d3d77f6..8cf653c72fd8 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -53,7 +53,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
return 0;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct compat_ebt_mark_t_info {
compat_ulong_t mark;
compat_uint_t target;
@@ -87,7 +87,7 @@ static struct xt_target ebt_mark_tg_reg __read_mostly = {
.target = ebt_mark_tg,
.checkentry = ebt_mark_tg_check,
.targetsize = sizeof(struct ebt_mark_t_info),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(struct compat_ebt_mark_t_info),
.compat_from_user = mark_tg_compat_from_user,
.compat_to_user = mark_tg_compat_to_user,
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 81fb59dec499..5872e73c741e 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -37,7 +37,7 @@ static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct compat_ebt_mark_m_info {
compat_ulong_t mark, mask;
uint8_t invert, bitmask;
@@ -75,7 +75,7 @@ static struct xt_match ebt_mark_mt_reg __read_mostly = {
.match = ebt_mark_mt,
.checkentry = ebt_mark_mt_check,
.matchsize = sizeof(struct ebt_mark_m_info),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(struct compat_ebt_mark_m_info),
.compat_from_user = mark_mt_compat_from_user,
.compat_to_user = mark_mt_compat_to_user,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 32bc2821027f..020b1487ee0c 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -66,8 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
NFPROTO_BRIDGE, s->in, NULL, NULL,
s->net, NULL);
- ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
-
+ ret = ebt_do_table(skb, &state, priv);
if (ret != NF_DROP)
return ret;
@@ -101,18 +100,17 @@ static const struct nf_hook_ops ebt_ops_broute = {
static int __net_init broute_net_init(struct net *net)
{
- return ebt_register_table(net, &broute_table, &ebt_ops_broute,
- &net->xt.broute_table);
+ return ebt_register_table(net, &broute_table, &ebt_ops_broute);
}
static void __net_exit broute_net_pre_exit(struct net *net)
{
- ebt_unregister_table_pre_exit(net, "broute", &ebt_ops_broute);
+ ebt_unregister_table_pre_exit(net, "broute");
}
static void __net_exit broute_net_exit(struct net *net)
{
- ebt_unregister_table(net, net->xt.broute_table);
+ ebt_unregister_table(net, "broute");
}
static struct pernet_operations broute_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index bcf982e12f16..8ec0b3736803 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -59,34 +59,27 @@ static const struct ebt_table frame_filter = {
};
static unsigned int
-ebt_in_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+ebt_filter_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
- return ebt_do_table(skb, state, state->net->xt.frame_filter);
-}
-
-static unsigned int
-ebt_out_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ebt_do_table(skb, state, state->net->xt.frame_filter);
+ return ebt_do_table(skb, state, priv);
}
static const struct nf_hook_ops ebt_ops_filter[] = {
{
- .hook = ebt_in_hook,
+ .hook = ebt_filter_hook,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
- .hook = ebt_in_hook,
+ .hook = ebt_filter_hook,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
- .hook = ebt_out_hook,
+ .hook = ebt_filter_hook,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_FILTER_OTHER,
@@ -95,18 +88,17 @@ static const struct nf_hook_ops ebt_ops_filter[] = {
static int __net_init frame_filter_net_init(struct net *net)
{
- return ebt_register_table(net, &frame_filter, ebt_ops_filter,
- &net->xt.frame_filter);
+ return ebt_register_table(net, &frame_filter, ebt_ops_filter);
}
static void __net_exit frame_filter_net_pre_exit(struct net *net)
{
- ebt_unregister_table_pre_exit(net, "filter", ebt_ops_filter);
+ ebt_unregister_table_pre_exit(net, "filter");
}
static void __net_exit frame_filter_net_exit(struct net *net)
{
- ebt_unregister_table(net, net->xt.frame_filter);
+ ebt_unregister_table(net, "filter");
}
static struct pernet_operations frame_filter_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 0d092773f816..7c8a1064a531 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -58,35 +58,27 @@ static const struct ebt_table frame_nat = {
.me = THIS_MODULE,
};
-static unsigned int
-ebt_nat_in(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
- return ebt_do_table(skb, state, state->net->xt.frame_nat);
-}
-
-static unsigned int
-ebt_nat_out(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ebt_do_table(skb, state, state->net->xt.frame_nat);
+ return ebt_do_table(skb, state, priv);
}
static const struct nf_hook_ops ebt_ops_nat[] = {
{
- .hook = ebt_nat_out,
+ .hook = ebt_nat_hook,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_NAT_DST_OTHER,
},
{
- .hook = ebt_nat_out,
+ .hook = ebt_nat_hook,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_NAT_SRC,
},
{
- .hook = ebt_nat_in,
+ .hook = ebt_nat_hook,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_NAT_DST_BRIDGED,
@@ -95,18 +87,17 @@ static const struct nf_hook_ops ebt_ops_nat[] = {
static int __net_init frame_nat_net_init(struct net *net)
{
- return ebt_register_table(net, &frame_nat, ebt_ops_nat,
- &net->xt.frame_nat);
+ return ebt_register_table(net, &frame_nat, ebt_ops_nat);
}
static void __net_exit frame_nat_net_pre_exit(struct net *net)
{
- ebt_unregister_table_pre_exit(net, "nat", ebt_ops_nat);
+ ebt_unregister_table_pre_exit(net, "nat");
}
static void __net_exit frame_nat_net_exit(struct net *net)
{
- ebt_unregister_table(net, net->xt.frame_nat);
+ ebt_unregister_table(net, "nat");
}
static struct pernet_operations frame_nat_net_ops = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 96d789c8d1c7..f022deb3721e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -47,7 +47,7 @@ struct ebt_pernet {
static unsigned int ebt_pernet_id __read_mostly;
static DEFINE_MUTEX(ebt_mutex);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
static void ebt_standard_compat_from_user(void *dst, const void *src)
{
int v = *(compat_int_t *)src;
@@ -73,7 +73,7 @@ static struct xt_target ebt_standard_target = {
.revision = 0,
.family = NFPROTO_BRIDGE,
.targetsize = sizeof(int),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(compat_int_t),
.compat_from_user = ebt_standard_compat_from_user,
.compat_to_user = ebt_standard_compat_to_user,
@@ -1136,15 +1136,18 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
vfree(table->private->entries);
ebt_free_table_info(table->private);
vfree(table->private);
+ kfree(table->ops);
kfree(table);
}
int ebt_register_table(struct net *net, const struct ebt_table *input_table,
- const struct nf_hook_ops *ops, struct ebt_table **res)
+ const struct nf_hook_ops *template_ops)
{
struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
struct ebt_table_info *newinfo;
struct ebt_table *t, *table;
+ struct nf_hook_ops *ops;
+ unsigned int num_ops;
struct ebt_replace_kernel *repl;
int ret, i, countersize;
void *p;
@@ -1213,15 +1216,31 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
ret = -ENOENT;
goto free_unlock;
}
+
+ num_ops = hweight32(table->valid_hooks);
+ if (num_ops == 0) {
+ ret = -EINVAL;
+ goto free_unlock;
+ }
+
+ ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ if (!ops) {
+ ret = -ENOMEM;
+ if (newinfo->nentries)
+ module_put(table->me);
+ goto free_unlock;
+ }
+
+ for (i = 0; i < num_ops; i++)
+ ops[i].priv = table;
+
list_add(&table->list, &ebt_net->tables);
mutex_unlock(&ebt_mutex);
- WRITE_ONCE(*res, table);
- ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
- if (ret) {
+ table->ops = ops;
+ ret = nf_register_net_hooks(net, ops, num_ops);
+ if (ret)
__ebt_unregister_table(net, table);
- *res = NULL;
- }
audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
AUDIT_XT_OP_REGISTER, GFP_KERNEL);
@@ -1257,18 +1276,21 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
return NULL;
}
-void ebt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops)
+void ebt_unregister_table_pre_exit(struct net *net, const char *name)
{
struct ebt_table *table = __ebt_find_table(net, name);
if (table)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
EXPORT_SYMBOL(ebt_unregister_table_pre_exit);
-void ebt_unregister_table(struct net *net, struct ebt_table *table)
+void ebt_unregister_table(struct net *net, const char *name)
{
- __ebt_unregister_table(net, table);
+ struct ebt_table *table = __ebt_find_table(net, name);
+
+ if (table)
+ __ebt_unregister_table(net, table);
}
/* userspace just supplied us with counters */
@@ -1480,7 +1502,7 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user,
ebt_entry_to_user, entries, tmp.entries);
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
/* 32 bit-userspace compatibility definitions. */
struct compat_ebt_replace {
char name[EBT_TABLE_MAXNAMELEN];
@@ -2345,7 +2367,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
/* try real handler in case userland supplied needed padding */
if (in_compat_syscall() &&
((cmd != EBT_SO_GET_INFO && cmd != EBT_SO_GET_INIT_INFO) ||
@@ -2412,7 +2434,7 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
switch (cmd) {
case EBT_SO_SET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_do_replace(net, arg, len);
else
@@ -2420,7 +2442,7 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
ret = do_replace(net, arg, len);
break;
case EBT_SO_SET_COUNTERS:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_update_counters(net, arg, len);
else
diff --git a/net/core/dev.c b/net/core/dev.c
index d9bf63dbe4fd..222b1d322c96 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4723,10 +4723,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
+ bool orig_bcast, orig_host;
u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
- bool orig_bcast;
int off;
/* Reinjected packets coming from act_mirred or similar should
@@ -4773,6 +4773,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
orig_data_end = xdp->data_end;
orig_data = xdp->data;
eth = (struct ethhdr *)xdp->data;
+ orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr);
orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
orig_eth_type = eth->h_proto;
@@ -4800,8 +4801,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* check if XDP changed eth hdr such SKB needs update */
eth = (struct ethhdr *)xdp->data;
if ((orig_eth_type != eth->h_proto) ||
+ (orig_host != ether_addr_equal_64bits(eth->h_dest,
+ skb->dev->dev_addr)) ||
(orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
__skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
}
@@ -5962,7 +5966,7 @@ static void gro_list_prepare(const struct list_head *head,
}
}
-static void skb_gro_reset_offset(struct sk_buff *skb)
+static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
{
const struct skb_shared_info *pinfo = skb_shinfo(skb);
const skb_frag_t *frag0 = &pinfo->frags[0];
@@ -5973,7 +5977,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
if (!skb_headlen(skb) && pinfo->nr_frags &&
!PageHighMem(skb_frag_page(frag0)) &&
- (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+ (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
skb_frag_size(frag0),
@@ -6191,7 +6195,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
- skb_gro_reset_offset(skb);
+ skb_gro_reset_offset(skb, 0);
ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
trace_napi_gro_receive_exit(ret);
@@ -6280,7 +6284,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
napi->skb = NULL;
skb_reset_mac_header(skb);
- skb_gro_reset_offset(skb);
+ skb_gro_reset_offset(skb, hlen);
if (unlikely(skb_gro_header_hard(skb, hlen))) {
eth = skb_gro_header_slow(skb, hlen, 0);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 737b61c2976e..4eb969518ee0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8599,9 +8599,10 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
* @controller: associated controller number for the devlink port instance
* @pf: associated PF for the devlink port instance
* @sf: associated SF of a PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
*/
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
- u16 pf, u32 sf)
+ u16 pf, u32 sf, bool external)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
@@ -8615,6 +8616,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
attrs->pci_sf.controller = controller;
attrs->pci_sf.pf = pf;
attrs->pci_sf.sf = sf;
+ attrs->pci_sf.external = external;
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
@@ -8667,6 +8669,13 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
attrs->pci_vf.pf, attrs->pci_vf.vf);
break;
case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ if (attrs->pci_sf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
attrs->pci_sf.sf);
break;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8379719d1dce..98f20efbfadf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -131,6 +131,9 @@ static void neigh_update_gc_list(struct neighbour *n)
write_lock_bh(&n->tbl->lock);
write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+
/* remove from the gc list if new state is permanent or if neighbor
* is externally learned; otherwise entry should be on the gc list
*/
@@ -147,6 +150,7 @@ static void neigh_update_gc_list(struct neighbour *n)
atomic_inc(&n->tbl->gc_entries);
}
+out:
write_unlock(&n->lock);
write_unlock_bh(&n->tbl->lock);
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 3d190d22b0d8..6f1b82b8ad49 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -188,7 +188,7 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
if (!sk->sk_prot->psock_update_sk_prot)
return -EINVAL;
psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;
- return sk->sk_prot->psock_update_sk_prot(sk, false);
+ return sk->sk_prot->psock_update_sk_prot(sk, psock, false);
}
static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
@@ -1521,7 +1521,7 @@ void sock_map_close(struct sock *sk, long timeout)
lock_sock(sk);
rcu_read_lock();
- psock = sk_psock(sk);
+ psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
@@ -1532,6 +1532,7 @@ void sock_map_close(struct sock *sk, long timeout)
sock_map_remove_links(sk, psock);
rcu_read_unlock();
sk_psock_stop(psock, true);
+ sk_psock_put(sk, psock);
release_sock(sk);
saved_close(sk, timeout);
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d6d45d820d79..cf20316094d0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -713,7 +713,7 @@ static int copy_entries_to_user(unsigned int total_size,
return ret;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
static void compat_standard_from_user(void *dst, const void *src)
{
int v = *(compat_int_t *)src;
@@ -800,7 +800,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
return -EFAULT;
name[XT_TABLE_MAXNAMELEN-1] = '\0';
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
xt_compat_lock(NFPROTO_ARP);
#endif
@@ -808,7 +808,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
if (!IS_ERR(t)) {
struct arpt_getinfo info;
const struct xt_table_info *private = t->private;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct xt_table_info tmp;
if (in_compat_syscall()) {
@@ -835,7 +835,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
module_put(t->me);
} else
ret = PTR_ERR(t);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
xt_compat_unlock(NFPROTO_ARP);
#endif
@@ -1044,7 +1044,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
return ret;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct compat_arpt_replace {
char name[XT_TABLE_MAXNAMELEN];
u32 valid_hooks;
@@ -1412,7 +1412,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
switch (cmd) {
case ARPT_SO_SET_REPLACE:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_do_replace(sock_net(sk), arg, len);
else
@@ -1444,7 +1444,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
break;
case ARPT_SO_GET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_get_entries(sock_net(sk), user, len);
else
@@ -1499,10 +1499,11 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table)
int arpt_register_table(struct net *net,
const struct xt_table *table,
const struct arpt_replace *repl,
- const struct nf_hook_ops *ops,
- struct xt_table **res)
+ const struct nf_hook_ops *template_ops)
{
- int ret;
+ struct nf_hook_ops *ops;
+ unsigned int num_ops;
+ int ret, i;
struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
void *loc_cpu_entry;
@@ -1516,41 +1517,61 @@ int arpt_register_table(struct net *net,
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
- if (ret != 0)
- goto out_free;
+ if (ret != 0) {
+ xt_free_table_info(newinfo);
+ return ret;
+ }
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
- ret = PTR_ERR(new_table);
- goto out_free;
+ xt_free_table_info(newinfo);
+ return PTR_ERR(new_table);
}
- /* set res now, will see skbs right after nf_register_net_hooks */
- WRITE_ONCE(*res, new_table);
+ num_ops = hweight32(table->valid_hooks);
+ if (num_ops == 0) {
+ ret = -EINVAL;
+ goto out_free;
+ }
- ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
- if (ret != 0) {
- __arpt_unregister_table(net, new_table);
- *res = NULL;
+ ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ if (!ops) {
+ ret = -ENOMEM;
+ goto out_free;
}
+ for (i = 0; i < num_ops; i++)
+ ops[i].priv = new_table;
+
+ new_table->ops = ops;
+
+ ret = nf_register_net_hooks(net, ops, num_ops);
+ if (ret != 0)
+ goto out_free;
+
return ret;
out_free:
- xt_free_table_info(newinfo);
+ __arpt_unregister_table(net, new_table);
return ret;
}
-void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+void arpt_unregister_table_pre_exit(struct net *net, const char *name,
const struct nf_hook_ops *ops)
{
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
+
+ if (table)
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
}
EXPORT_SYMBOL(arpt_unregister_table_pre_exit);
-void arpt_unregister_table(struct net *net, struct xt_table *table)
+void arpt_unregister_table(struct net *net, const char *name)
{
- __arpt_unregister_table(net, table);
+ struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
+
+ if (table)
+ __arpt_unregister_table(net, table);
}
/* The built-in targets: standard (NULL) and error. */
@@ -1559,7 +1580,7 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
.name = XT_STANDARD_TARGET,
.targetsize = sizeof(int),
.family = NFPROTO_ARP,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(compat_int_t),
.compat_from_user = compat_standard_from_user,
.compat_to_user = compat_standard_to_user,
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6c300ba5634e..b8f45e9bbec8 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -34,7 +34,7 @@ static unsigned int
arptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return arpt_do_table(skb, state, state->net->ipv4.arptable_filter);
+ return arpt_do_table(skb, state, priv);
}
static struct nf_hook_ops *arpfilter_ops __read_mostly;
@@ -44,31 +44,22 @@ static int __net_init arptable_filter_table_init(struct net *net)
struct arpt_replace *repl;
int err;
- if (net->ipv4.arptable_filter)
- return 0;
-
repl = arpt_alloc_initial_table(&packet_filter);
if (repl == NULL)
return -ENOMEM;
- err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops,
- &net->ipv4.arptable_filter);
+ err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops);
kfree(repl);
return err;
}
static void __net_exit arptable_filter_net_pre_exit(struct net *net)
{
- if (net->ipv4.arptable_filter)
- arpt_unregister_table_pre_exit(net, net->ipv4.arptable_filter,
- arpfilter_ops);
+ arpt_unregister_table_pre_exit(net, "filter", arpfilter_ops);
}
static void __net_exit arptable_filter_net_exit(struct net *net)
{
- if (!net->ipv4.arptable_filter)
- return;
- arpt_unregister_table(net, net->ipv4.arptable_filter);
- net->ipv4.arptable_filter = NULL;
+ arpt_unregister_table(net, "filter");
}
static struct pernet_operations arptable_filter_net_ops = {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f77ea0dbe656..13acb687c19a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -868,7 +868,7 @@ copy_entries_to_user(unsigned int total_size,
return ret;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
static void compat_standard_from_user(void *dst, const void *src)
{
int v = *(compat_int_t *)src;
@@ -957,7 +957,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
return -EFAULT;
name[XT_TABLE_MAXNAMELEN-1] = '\0';
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
xt_compat_lock(AF_INET);
#endif
@@ -965,7 +965,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
if (!IS_ERR(t)) {
struct ipt_getinfo info;
const struct xt_table_info *private = t->private;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct xt_table_info tmp;
if (in_compat_syscall()) {
@@ -993,7 +993,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
module_put(t->me);
} else
ret = PTR_ERR(t);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
xt_compat_unlock(AF_INET);
#endif
@@ -1199,7 +1199,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
return ret;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct compat_ipt_replace {
char name[XT_TABLE_MAXNAMELEN];
u32 valid_hooks;
@@ -1621,7 +1621,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
switch (cmd) {
case IPT_SO_SET_REPLACE:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_do_replace(sock_net(sk), arg, len);
else
@@ -1654,7 +1654,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
break;
case IPT_SO_GET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_get_entries(sock_net(sk), user, len);
else
@@ -1716,9 +1716,11 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table)
int ipt_register_table(struct net *net, const struct xt_table *table,
const struct ipt_replace *repl,
- const struct nf_hook_ops *ops, struct xt_table **res)
+ const struct nf_hook_ops *template_ops)
{
- int ret;
+ struct nf_hook_ops *ops;
+ unsigned int num_ops;
+ int ret, i;
struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
void *loc_cpu_entry;
@@ -1732,50 +1734,65 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
- if (ret != 0)
- goto out_free;
+ if (ret != 0) {
+ xt_free_table_info(newinfo);
+ return ret;
+ }
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
- ret = PTR_ERR(new_table);
- goto out_free;
+ xt_free_table_info(newinfo);
+ return PTR_ERR(new_table);
}
- /* set res now, will see skbs right after nf_register_net_hooks */
- WRITE_ONCE(*res, new_table);
- if (!ops)
+ /* No template? No need to do anything. This is used by 'nat' table, it registers
+ * with the nat core instead of the netfilter core.
+ */
+ if (!template_ops)
return 0;
- ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
- if (ret != 0) {
- __ipt_unregister_table(net, new_table);
- *res = NULL;
+ num_ops = hweight32(table->valid_hooks);
+ if (num_ops == 0) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ if (!ops) {
+ ret = -ENOMEM;
+ goto out_free;
}
+ for (i = 0; i < num_ops; i++)
+ ops[i].priv = new_table;
+
+ new_table->ops = ops;
+
+ ret = nf_register_net_hooks(net, ops, num_ops);
+ if (ret != 0)
+ goto out_free;
+
return ret;
out_free:
- xt_free_table_info(newinfo);
+ __ipt_unregister_table(net, new_table);
return ret;
}
-void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops)
+void ipt_unregister_table_pre_exit(struct net *net, const char *name)
{
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
-}
+ struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
-void ipt_unregister_table_exit(struct net *net, struct xt_table *table)
-{
- __ipt_unregister_table(net, table);
+ if (table)
+ nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
-void ipt_unregister_table(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops)
+void ipt_unregister_table_exit(struct net *net, const char *name)
{
- if (ops)
- ipt_unregister_table_pre_exit(net, table, ops);
- __ipt_unregister_table(net, table);
+ struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
+
+ if (table)
+ __ipt_unregister_table(net, table);
}
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
@@ -1829,7 +1846,7 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
.name = XT_STANDARD_TARGET,
.targetsize = sizeof(int),
.family = NFPROTO_IPV4,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(compat_int_t),
.compat_from_user = compat_standard_from_user,
.compat_to_user = compat_standard_to_user,
@@ -1924,7 +1941,6 @@ static void __exit ip_tables_fini(void)
}
EXPORT_SYMBOL(ipt_register_table);
-EXPORT_SYMBOL(ipt_unregister_table);
EXPORT_SYMBOL(ipt_unregister_table_pre_exit);
EXPORT_SYMBOL(ipt_unregister_table_exit);
EXPORT_SYMBOL(ipt_do_table);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a8b980ad11d4..8f7ca67475b7 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -541,7 +541,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
nf_ct_netns_put(par->net, par->family);
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct compat_ipt_clusterip_tgt_info
{
u_int32_t flags;
@@ -553,7 +553,7 @@ struct compat_ipt_clusterip_tgt_info
u_int32_t hash_initval;
compat_uptr_t config;
};
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
static struct xt_target clusterip_tg_reg __read_mostly = {
.name = "CLUSTERIP",
@@ -563,9 +563,9 @@ static struct xt_target clusterip_tg_reg __read_mostly = {
.destroy = clusterip_tg_destroy,
.targetsize = sizeof(struct ipt_clusterip_tgt_info),
.usersize = offsetof(struct ipt_clusterip_tgt_info, config),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
.me = THIS_MODULE
};
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 8f7bc1ee7453..8272df7c6ad5 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -34,7 +34,7 @@ static unsigned int
iptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
+ return ipt_do_table(skb, state, priv);
}
static struct nf_hook_ops *filter_ops __read_mostly;
@@ -48,9 +48,6 @@ static int __net_init iptable_filter_table_init(struct net *net)
struct ipt_replace *repl;
int err;
- if (net->ipv4.iptable_filter)
- return 0;
-
repl = ipt_alloc_initial_table(&packet_filter);
if (repl == NULL)
return -ENOMEM;
@@ -58,8 +55,7 @@ static int __net_init iptable_filter_table_init(struct net *net)
((struct ipt_standard *)repl->entries)[1].target.verdict =
forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
- err = ipt_register_table(net, &packet_filter, repl, filter_ops,
- &net->ipv4.iptable_filter);
+ err = ipt_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
return err;
}
@@ -74,17 +70,12 @@ static int __net_init iptable_filter_net_init(struct net *net)
static void __net_exit iptable_filter_net_pre_exit(struct net *net)
{
- if (net->ipv4.iptable_filter)
- ipt_unregister_table_pre_exit(net, net->ipv4.iptable_filter,
- filter_ops);
+ ipt_unregister_table_pre_exit(net, "filter");
}
static void __net_exit iptable_filter_net_exit(struct net *net)
{
- if (!net->ipv4.iptable_filter)
- return;
- ipt_unregister_table_exit(net, net->ipv4.iptable_filter);
- net->ipv4.iptable_filter = NULL;
+ ipt_unregister_table_exit(net, "filter");
}
static struct pernet_operations iptable_filter_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 833079589273..2abc3836f391 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -37,7 +37,7 @@ static const struct xt_table packet_mangler = {
};
static unsigned int
-ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
+ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
{
unsigned int ret;
const struct iphdr *iph;
@@ -53,7 +53,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
+ ret = ipt_do_table(skb, state, priv);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
@@ -78,8 +78,8 @@ iptable_mangle_hook(void *priv,
const struct nf_hook_state *state)
{
if (state->hook == NF_INET_LOCAL_OUT)
- return ipt_mangle_out(skb, state);
- return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
+ return ipt_mangle_out(skb, state, priv);
+ return ipt_do_table(skb, state, priv);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
@@ -88,31 +88,22 @@ static int __net_init iptable_mangle_table_init(struct net *net)
struct ipt_replace *repl;
int ret;
- if (net->ipv4.iptable_mangle)
- return 0;
-
repl = ipt_alloc_initial_table(&packet_mangler);
if (repl == NULL)
return -ENOMEM;
- ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops,
- &net->ipv4.iptable_mangle);
+ ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops);
kfree(repl);
return ret;
}
static void __net_exit iptable_mangle_net_pre_exit(struct net *net)
{
- if (net->ipv4.iptable_mangle)
- ipt_unregister_table_pre_exit(net, net->ipv4.iptable_mangle,
- mangle_ops);
+ ipt_unregister_table_pre_exit(net, "mangle");
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
{
- if (!net->ipv4.iptable_mangle)
- return;
- ipt_unregister_table_exit(net, net->ipv4.iptable_mangle);
- net->ipv4.iptable_mangle = NULL;
+ ipt_unregister_table_exit(net, "mangle");
}
static struct pernet_operations iptable_mangle_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index b0143b109f25..a9913842ef18 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -13,8 +13,14 @@
#include <net/netfilter/nf_nat.h>
+struct iptable_nat_pernet {
+ struct nf_hook_ops *nf_nat_ops;
+};
+
static int __net_init iptable_nat_table_init(struct net *net);
+static unsigned int iptable_nat_net_id __read_mostly;
+
static const struct xt_table nf_nat_ipv4_table = {
.name = "nat",
.valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -30,7 +36,7 @@ static unsigned int iptable_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ipt_do_table(skb, state, state->net->ipv4.nat_table);
+ return ipt_do_table(skb, state, priv);
}
static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
@@ -62,27 +68,49 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
static int ipt_nat_register_lookups(struct net *net)
{
+ struct iptable_nat_pernet *xt_nat_net;
+ struct nf_hook_ops *ops;
+ struct xt_table *table;
int i, ret;
+ xt_nat_net = net_generic(net, iptable_nat_net_id);
+ table = xt_find_table(net, NFPROTO_IPV4, "nat");
+ if (WARN_ON_ONCE(!table))
+ return -ENOENT;
+
+ ops = kmemdup(nf_nat_ipv4_ops, sizeof(nf_nat_ipv4_ops), GFP_KERNEL);
+ if (!ops)
+ return -ENOMEM;
+
for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) {
- ret = nf_nat_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]);
+ ops[i].priv = table;
+ ret = nf_nat_ipv4_register_fn(net, &ops[i]);
if (ret) {
while (i)
- nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]);
+ nf_nat_ipv4_unregister_fn(net, &ops[--i]);
+ kfree(ops);
return ret;
}
}
+ xt_nat_net->nf_nat_ops = ops;
return 0;
}
static void ipt_nat_unregister_lookups(struct net *net)
{
+ struct iptable_nat_pernet *xt_nat_net = net_generic(net, iptable_nat_net_id);
+ struct nf_hook_ops *ops = xt_nat_net->nf_nat_ops;
int i;
+ if (!ops)
+ return;
+
for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++)
- nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]);
+ nf_nat_ipv4_unregister_fn(net, &ops[i]);
+
+ kfree(ops);
}
static int __net_init iptable_nat_table_init(struct net *net)
@@ -90,24 +118,19 @@ static int __net_init iptable_nat_table_init(struct net *net)
struct ipt_replace *repl;
int ret;
- if (net->ipv4.nat_table)
- return 0;
-
repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
if (repl == NULL)
return -ENOMEM;
- ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
- NULL, &net->ipv4.nat_table);
+
+ ret = ipt_register_table(net, &nf_nat_ipv4_table, repl, NULL);
if (ret < 0) {
kfree(repl);
return ret;
}
ret = ipt_nat_register_lookups(net);
- if (ret < 0) {
- ipt_unregister_table(net, net->ipv4.nat_table, NULL);
- net->ipv4.nat_table = NULL;
- }
+ if (ret < 0)
+ ipt_unregister_table_exit(net, "nat");
kfree(repl);
return ret;
@@ -115,21 +138,19 @@ static int __net_init iptable_nat_table_init(struct net *net)
static void __net_exit iptable_nat_net_pre_exit(struct net *net)
{
- if (net->ipv4.nat_table)
- ipt_nat_unregister_lookups(net);
+ ipt_nat_unregister_lookups(net);
}
static void __net_exit iptable_nat_net_exit(struct net *net)
{
- if (!net->ipv4.nat_table)
- return;
- ipt_unregister_table_exit(net, net->ipv4.nat_table);
- net->ipv4.nat_table = NULL;
+ ipt_unregister_table_exit(net, "nat");
}
static struct pernet_operations iptable_nat_net_ops = {
.pre_exit = iptable_nat_net_pre_exit,
.exit = iptable_nat_net_exit,
+ .id = &iptable_nat_net_id,
+ .size = sizeof(struct iptable_nat_pernet),
};
static int __init iptable_nat_init(void)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 9abfe6bf2cb9..ceef397c1f5f 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -41,7 +41,7 @@ static unsigned int
iptable_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
+ return ipt_do_table(skb, state, priv);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
@@ -55,31 +55,22 @@ static int __net_init iptable_raw_table_init(struct net *net)
if (raw_before_defrag)
table = &packet_raw_before_defrag;
- if (net->ipv4.iptable_raw)
- return 0;
-
repl = ipt_alloc_initial_table(table);
if (repl == NULL)
return -ENOMEM;
- ret = ipt_register_table(net, table, repl, rawtable_ops,
- &net->ipv4.iptable_raw);
+ ret = ipt_register_table(net, table, repl, rawtable_ops);
kfree(repl);
return ret;
}
static void __net_exit iptable_raw_net_pre_exit(struct net *net)
{
- if (net->ipv4.iptable_raw)
- ipt_unregister_table_pre_exit(net, net->ipv4.iptable_raw,
- rawtable_ops);
+ ipt_unregister_table_pre_exit(net, "raw");
}
static void __net_exit iptable_raw_net_exit(struct net *net)
{
- if (!net->ipv4.iptable_raw)
- return;
- ipt_unregister_table_exit(net, net->ipv4.iptable_raw);
- net->ipv4.iptable_raw = NULL;
+ ipt_unregister_table_exit(net, "raw");
}
static struct pernet_operations iptable_raw_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 415c1975d770..77973f5fd8f6 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -40,7 +40,7 @@ static unsigned int
iptable_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
+ return ipt_do_table(skb, state, priv);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -50,31 +50,22 @@ static int __net_init iptable_security_table_init(struct net *net)
struct ipt_replace *repl;
int ret;
- if (net->ipv4.iptable_security)
- return 0;
-
repl = ipt_alloc_initial_table(&security_table);
if (repl == NULL)
return -ENOMEM;
- ret = ipt_register_table(net, &security_table, repl, sectbl_ops,
- &net->ipv4.iptable_security);
+ ret = ipt_register_table(net, &security_table, repl, sectbl_ops);
kfree(repl);
return ret;
}
static void __net_exit iptable_security_net_pre_exit(struct net *net)
{
- if (net->ipv4.iptable_security)
- ipt_unregister_table_pre_exit(net, net->ipv4.iptable_security,
- sectbl_ops);
+ ipt_unregister_table_pre_exit(net, "security");
}
static void __net_exit iptable_security_net_exit(struct net *net)
{
- if (!net->ipv4.iptable_security)
- return;
- ipt_unregister_table_exit(net, net->ipv4.iptable_security);
- net->ipv4.iptable_security = NULL;
+ ipt_unregister_table_exit(net, "security");
}
static struct pernet_operations iptable_security_net_ops = {
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index ffdcc2b9360f..613432a36f0a 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -141,14 +141,16 @@ int nf_defrag_ipv4_enable(struct net *net)
struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
int err = 0;
- might_sleep();
-
- if (nf_defrag->users)
- return 0;
-
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users)
+ if (nf_defrag->users == UINT_MAX) {
+ err = -EOVERFLOW;
goto out_unlock;
+ }
+
+ if (nf_defrag->users) {
+ nf_defrag->users++;
+ goto out_unlock;
+ }
err = nf_register_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
@@ -161,6 +163,22 @@ int nf_defrag_ipv4_enable(struct net *net)
}
EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
+void nf_defrag_ipv4_disable(struct net *net)
+{
+ struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
+
+ mutex_lock(&defrag4_mutex);
+ if (nf_defrag->users) {
+ nf_defrag->users--;
+ if (nf_defrag->users == 0)
+ nf_unregister_net_hooks(net, ipv4_defrag_ops,
+ ARRAY_SIZE(ipv4_defrag_ops));
+ }
+
+ mutex_unlock(&defrag4_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv4_disable);
+
module_init(nf_defrag_init);
module_exit(nf_defrag_fini);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 4f49c12dae53..ad9d17923fc5 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -499,9 +499,8 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
}
-int tcp_bpf_update_proto(struct sock *sk, bool restore)
+int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
- struct sk_psock *psock = sk_psock(sk);
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7d5c4ebf42fe..954c4591a6fd 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -103,14 +103,12 @@ static int __init udp_bpf_v4_build_proto(void)
}
core_initcall(udp_bpf_v4_build_proto);
-int udp_bpf_update_proto(struct sock *sk, bool restore)
+int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
- struct sk_psock *psock = sk_psock(sk);
if (restore) {
sk->sk_write_space = psock->saved_write_space;
- /* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, psock->sk_proto);
return 0;
}
@@ -118,7 +116,6 @@ int udp_bpf_update_proto(struct sock *sk, bool restore)
if (sk->sk_family == AF_INET6)
udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
- /* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]);
return 0;
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index eb2b5404806c..e810a23baf99 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -884,7 +884,7 @@ copy_entries_to_user(unsigned int total_size,
return ret;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
static void compat_standard_from_user(void *dst, const void *src)
{
int v = *(compat_int_t *)src;
@@ -973,7 +973,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
return -EFAULT;
name[XT_TABLE_MAXNAMELEN-1] = '\0';
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
xt_compat_lock(AF_INET6);
#endif
@@ -981,7 +981,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
if (!IS_ERR(t)) {
struct ip6t_getinfo info;
const struct xt_table_info *private = t->private;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct xt_table_info tmp;
if (in_compat_syscall()) {
@@ -1009,7 +1009,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
module_put(t->me);
} else
ret = PTR_ERR(t);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
xt_compat_unlock(AF_INET6);
#endif
@@ -1215,7 +1215,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
return ret;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct compat_ip6t_replace {
char name[XT_TABLE_MAXNAMELEN];
u32 valid_hooks;
@@ -1630,7 +1630,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
switch (cmd) {
case IP6T_SO_SET_REPLACE:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_do_replace(sock_net(sk), arg, len);
else
@@ -1663,7 +1663,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
break;
case IP6T_SO_GET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall())
ret = compat_get_entries(sock_net(sk), user, len);
else
@@ -1725,10 +1725,11 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
int ip6t_register_table(struct net *net, const struct xt_table *table,
const struct ip6t_replace *repl,
- const struct nf_hook_ops *ops,
- struct xt_table **res)
+ const struct nf_hook_ops *template_ops)
{
- int ret;
+ struct nf_hook_ops *ops;
+ unsigned int num_ops;
+ int ret, i;
struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
void *loc_cpu_entry;
@@ -1742,50 +1743,62 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
- if (ret != 0)
- goto out_free;
+ if (ret != 0) {
+ xt_free_table_info(newinfo);
+ return ret;
+ }
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
- ret = PTR_ERR(new_table);
- goto out_free;
+ xt_free_table_info(newinfo);
+ return PTR_ERR(new_table);
}
- /* set res now, will see skbs right after nf_register_net_hooks */
- WRITE_ONCE(*res, new_table);
- if (!ops)
+ if (!template_ops)
return 0;
- ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
- if (ret != 0) {
- __ip6t_unregister_table(net, new_table);
- *res = NULL;
+ num_ops = hweight32(table->valid_hooks);
+ if (num_ops == 0) {
+ ret = -EINVAL;
+ goto out_free;
}
+ ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ if (!ops) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ for (i = 0; i < num_ops; i++)
+ ops[i].priv = new_table;
+
+ new_table->ops = ops;
+
+ ret = nf_register_net_hooks(net, ops, num_ops);
+ if (ret != 0)
+ goto out_free;
+
return ret;
out_free:
- xt_free_table_info(newinfo);
+ __ip6t_unregister_table(net, new_table);
return ret;
}
-void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops)
+void ip6t_unregister_table_pre_exit(struct net *net, const char *name)
{
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
-}
+ struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
-void ip6t_unregister_table_exit(struct net *net, struct xt_table *table)
-{
- __ip6t_unregister_table(net, table);
+ if (table)
+ nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
-void ip6t_unregister_table(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops)
+void ip6t_unregister_table_exit(struct net *net, const char *name)
{
- if (ops)
- ip6t_unregister_table_pre_exit(net, table, ops);
- __ip6t_unregister_table(net, table);
+ struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
+
+ if (table)
+ __ip6t_unregister_table(net, table);
}
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
@@ -1840,7 +1853,7 @@ static struct xt_target ip6t_builtin_tg[] __read_mostly = {
.name = XT_STANDARD_TARGET,
.targetsize = sizeof(int),
.family = NFPROTO_IPV6,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(compat_int_t),
.compat_from_user = compat_standard_from_user,
.compat_to_user = compat_standard_to_user,
@@ -1935,7 +1948,6 @@ static void __exit ip6_tables_fini(void)
}
EXPORT_SYMBOL(ip6t_register_table);
-EXPORT_SYMBOL(ip6t_unregister_table);
EXPORT_SYMBOL(ip6t_unregister_table_pre_exit);
EXPORT_SYMBOL(ip6t_unregister_table_exit);
EXPORT_SYMBOL(ip6t_do_table);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 88337b51ffbf..bb784ea7bbd3 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -35,7 +35,7 @@ static unsigned int
ip6table_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
+ return ip6t_do_table(skb, state, priv);
}
static struct nf_hook_ops *filter_ops __read_mostly;
@@ -49,9 +49,6 @@ static int __net_init ip6table_filter_table_init(struct net *net)
struct ip6t_replace *repl;
int err;
- if (net->ipv6.ip6table_filter)
- return 0;
-
repl = ip6t_alloc_initial_table(&packet_filter);
if (repl == NULL)
return -ENOMEM;
@@ -59,8 +56,7 @@ static int __net_init ip6table_filter_table_init(struct net *net)
((struct ip6t_standard *)repl->entries)[1].target.verdict =
forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
- err = ip6t_register_table(net, &packet_filter, repl, filter_ops,
- &net->ipv6.ip6table_filter);
+ err = ip6t_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
return err;
}
@@ -75,17 +71,12 @@ static int __net_init ip6table_filter_net_init(struct net *net)
static void __net_exit ip6table_filter_net_pre_exit(struct net *net)
{
- if (net->ipv6.ip6table_filter)
- ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_filter,
- filter_ops);
+ ip6t_unregister_table_pre_exit(net, "filter");
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
{
- if (!net->ipv6.ip6table_filter)
- return;
- ip6t_unregister_table_exit(net, net->ipv6.ip6table_filter);
- net->ipv6.ip6table_filter = NULL;
+ ip6t_unregister_table_exit(net, "filter");
}
static struct pernet_operations ip6table_filter_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index cee74803d7a1..c76cffd63041 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -32,7 +32,7 @@ static const struct xt_table packet_mangler = {
};
static unsigned int
-ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
+ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
{
unsigned int ret;
struct in6_addr saddr, daddr;
@@ -49,7 +49,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
- ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
+ ret = ip6t_do_table(skb, state, priv);
if (ret != NF_DROP && ret != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -71,8 +71,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (state->hook == NF_INET_LOCAL_OUT)
- return ip6t_mangle_out(skb, state);
- return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
+ return ip6t_mangle_out(skb, state, priv);
+ return ip6t_do_table(skb, state, priv);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
@@ -81,32 +81,22 @@ static int __net_init ip6table_mangle_table_init(struct net *net)
struct ip6t_replace *repl;
int ret;
- if (net->ipv6.ip6table_mangle)
- return 0;
-
repl = ip6t_alloc_initial_table(&packet_mangler);
if (repl == NULL)
return -ENOMEM;
- ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops,
- &net->ipv6.ip6table_mangle);
+ ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops);
kfree(repl);
return ret;
}
static void __net_exit ip6table_mangle_net_pre_exit(struct net *net)
{
- if (net->ipv6.ip6table_mangle)
- ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_mangle,
- mangle_ops);
+ ip6t_unregister_table_pre_exit(net, "mangle");
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
{
- if (!net->ipv6.ip6table_mangle)
- return;
-
- ip6t_unregister_table_exit(net, net->ipv6.ip6table_mangle);
- net->ipv6.ip6table_mangle = NULL;
+ ip6t_unregister_table_exit(net, "mangle");
}
static struct pernet_operations ip6table_mangle_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 0a23265e3caa..b0292251e655 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -15,8 +15,14 @@
#include <net/netfilter/nf_nat.h>
+struct ip6table_nat_pernet {
+ struct nf_hook_ops *nf_nat_ops;
+};
+
static int __net_init ip6table_nat_table_init(struct net *net);
+static unsigned int ip6table_nat_net_id __read_mostly;
+
static const struct xt_table nf_nat_ipv6_table = {
.name = "nat",
.valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -32,7 +38,7 @@ static unsigned int ip6table_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
+ return ip6t_do_table(skb, state, priv);
}
static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
@@ -64,27 +70,49 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
static int ip6t_nat_register_lookups(struct net *net)
{
+ struct ip6table_nat_pernet *xt_nat_net;
+ struct nf_hook_ops *ops;
+ struct xt_table *table;
int i, ret;
+ table = xt_find_table(net, NFPROTO_IPV6, "nat");
+ if (WARN_ON_ONCE(!table))
+ return -ENOENT;
+
+ xt_nat_net = net_generic(net, ip6table_nat_net_id);
+ ops = kmemdup(nf_nat_ipv6_ops, sizeof(nf_nat_ipv6_ops), GFP_KERNEL);
+ if (!ops)
+ return -ENOMEM;
+
for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) {
- ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
+ ops[i].priv = table;
+ ret = nf_nat_ipv6_register_fn(net, &ops[i]);
if (ret) {
while (i)
- nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
+ nf_nat_ipv6_unregister_fn(net, &ops[--i]);
+ kfree(ops);
return ret;
}
}
+ xt_nat_net->nf_nat_ops = ops;
return 0;
}
static void ip6t_nat_unregister_lookups(struct net *net)
{
+ struct ip6table_nat_pernet *xt_nat_net = net_generic(net, ip6table_nat_net_id);
+ struct nf_hook_ops *ops = xt_nat_net->nf_nat_ops;
int i;
+ if (!ops)
+ return;
+
for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++)
- nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
+ nf_nat_ipv6_unregister_fn(net, &ops[i]);
+
+ kfree(ops);
}
static int __net_init ip6table_nat_table_init(struct net *net)
@@ -92,45 +120,39 @@ static int __net_init ip6table_nat_table_init(struct net *net)
struct ip6t_replace *repl;
int ret;
- if (net->ipv6.ip6table_nat)
- return 0;
-
repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
if (repl == NULL)
return -ENOMEM;
ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl,
- NULL, &net->ipv6.ip6table_nat);
+ NULL);
if (ret < 0) {
kfree(repl);
return ret;
}
ret = ip6t_nat_register_lookups(net);
- if (ret < 0) {
- ip6t_unregister_table(net, net->ipv6.ip6table_nat, NULL);
- net->ipv6.ip6table_nat = NULL;
- }
+ if (ret < 0)
+ ip6t_unregister_table_exit(net, "nat");
+
kfree(repl);
return ret;
}
static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
{
- if (net->ipv6.ip6table_nat)
- ip6t_nat_unregister_lookups(net);
+ ip6t_nat_unregister_lookups(net);
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
{
- if (!net->ipv6.ip6table_nat)
- return;
- ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat);
- net->ipv6.ip6table_nat = NULL;
+ ip6t_unregister_table_exit(net, "nat");
}
static struct pernet_operations ip6table_nat_net_ops = {
.pre_exit = ip6table_nat_net_pre_exit,
.exit = ip6table_nat_net_exit,
+ .id = &ip6table_nat_net_id,
+ .size = sizeof(struct ip6table_nat_pernet),
};
static int __init ip6table_nat_init(void)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 8f9e742226f7..f63c106c521e 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,7 +40,7 @@ static unsigned int
ip6table_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
+ return ip6t_do_table(skb, state, priv);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
@@ -54,31 +54,22 @@ static int __net_init ip6table_raw_table_init(struct net *net)
if (raw_before_defrag)
table = &packet_raw_before_defrag;
- if (net->ipv6.ip6table_raw)
- return 0;
-
repl = ip6t_alloc_initial_table(table);
if (repl == NULL)
return -ENOMEM;
- ret = ip6t_register_table(net, table, repl, rawtable_ops,
- &net->ipv6.ip6table_raw);
+ ret = ip6t_register_table(net, table, repl, rawtable_ops);
kfree(repl);
return ret;
}
static void __net_exit ip6table_raw_net_pre_exit(struct net *net)
{
- if (net->ipv6.ip6table_raw)
- ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_raw,
- rawtable_ops);
+ ip6t_unregister_table_pre_exit(net, "raw");
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
{
- if (!net->ipv6.ip6table_raw)
- return;
- ip6t_unregister_table_exit(net, net->ipv6.ip6table_raw);
- net->ipv6.ip6table_raw = NULL;
+ ip6t_unregister_table_exit(net, "raw");
}
static struct pernet_operations ip6table_raw_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 5e8c48fed032..8dc335cf450b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -39,7 +39,7 @@ static unsigned int
ip6table_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
+ return ip6t_do_table(skb, state, priv);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -49,31 +49,22 @@ static int __net_init ip6table_security_table_init(struct net *net)
struct ip6t_replace *repl;
int ret;
- if (net->ipv6.ip6table_security)
- return 0;
-
repl = ip6t_alloc_initial_table(&security_table);
if (repl == NULL)
return -ENOMEM;
- ret = ip6t_register_table(net, &security_table, repl, sectbl_ops,
- &net->ipv6.ip6table_security);
+ ret = ip6t_register_table(net, &security_table, repl, sectbl_ops);
kfree(repl);
return ret;
}
static void __net_exit ip6table_security_net_pre_exit(struct net *net)
{
- if (net->ipv6.ip6table_security)
- ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_security,
- sectbl_ops);
+ ip6t_unregister_table_pre_exit(net, "security");
}
static void __net_exit ip6table_security_net_exit(struct net *net)
{
- if (!net->ipv6.ip6table_security)
- return;
- ip6t_unregister_table_exit(net, net->ipv6.ip6table_security);
- net->ipv6.ip6table_security = NULL;
+ ip6t_unregister_table_exit(net, "security");
}
static struct pernet_operations ip6table_security_net_ops = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 402dc4ca9504..e8a59d8bf2ad 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -137,14 +137,16 @@ int nf_defrag_ipv6_enable(struct net *net)
struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
int err = 0;
- might_sleep();
-
- if (nf_frag->users)
- return 0;
-
mutex_lock(&defrag6_mutex);
- if (nf_frag->users)
+ if (nf_frag->users == UINT_MAX) {
+ err = -EOVERFLOW;
+ goto out_unlock;
+ }
+
+ if (nf_frag->users) {
+ nf_frag->users++;
goto out_unlock;
+ }
err = nf_register_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
@@ -157,6 +159,21 @@ int nf_defrag_ipv6_enable(struct net *net)
}
EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
+void nf_defrag_ipv6_disable(struct net *net)
+{
+ struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
+
+ mutex_lock(&defrag6_mutex);
+ if (nf_frag->users) {
+ nf_frag->users--;
+ if (nf_frag->users == 0)
+ nf_unregister_net_hooks(net, ipv6_defrag_ops,
+ ARRAY_SIZE(ipv6_defrag_ops));
+ }
+ mutex_unlock(&defrag6_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv6_disable);
+
module_init(nf_defrag_init);
module_exit(nf_defrag_fini);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8bf21996734d..29a2d690d8d5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -392,6 +392,14 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
return false;
}
+static void mptcp_set_datafin_timeout(const struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
+ TCP_RTO_MIN << icsk->icsk_retransmits);
+}
+
static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
{
long tout = ssk && inet_csk(ssk)->icsk_pending ?
@@ -1062,7 +1070,7 @@ out:
}
if (snd_una == READ_ONCE(msk->snd_nxt)) {
- if (msk->timer_ival)
+ if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
mptcp_stop_timer(sk);
} else {
mptcp_reset_timer(sk);
@@ -2287,8 +2295,19 @@ static void __mptcp_retrans(struct sock *sk)
__mptcp_clean_una_wakeup(sk);
dfrag = mptcp_rtx_head(sk);
- if (!dfrag)
+ if (!dfrag) {
+ if (mptcp_data_fin_enabled(msk)) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ icsk->icsk_retransmits++;
+ mptcp_set_datafin_timeout(sk);
+ mptcp_send_ack(msk);
+
+ goto reset_timer;
+ }
+
return;
+ }
ssk = mptcp_subflow_get_retrans(msk);
if (!ssk)
@@ -2474,6 +2493,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
pr_debug("Sending DATA_FIN on subflow %p", ssk);
mptcp_set_timeout(sk, ssk);
tcp_send_ack(ssk);
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
}
break;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index fcd8682704c4..56a2531a3402 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -728,6 +728,16 @@ config NETFILTER_XTABLES
if NETFILTER_XTABLES
+config NETFILTER_XTABLES_COMPAT
+ bool "Netfilter Xtables 32bit support"
+ depends on COMPAT
+ default y
+ help
+ This option provides a translation layer to run 32bit arp,ip(6),ebtables
+ binaries on 64bit kernels.
+
+ If unsure, say N.
+
comment "Xtables combined modules"
config NETFILTER_XT_MARK
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 359ff8ec236a..de2d20c37cda 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1031,26 +1031,22 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
return 0;
}
-static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_none(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
return -EOPNOTSUPP;
}
-static int ip_set_create(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
const char *name, *typename;
u8 family, revision;
- u32 flags = flag_exist(nlh);
+ u32 flags = flag_exist(info->nlh);
int ret = 0;
if (unlikely(protocol_min_failed(attr) ||
@@ -1101,7 +1097,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Set create flags depending on the type revision */
set->flags |= set->type->create_flags[revision];
- ret = set->type->create(net, set, tb, flags);
+ ret = set->type->create(info->net, set, tb, flags);
if (ret != 0)
goto put_out;
@@ -1183,12 +1179,10 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
-static int ip_set_destroy(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct ip_set *s;
ip_set_id_t i;
int ret = 0;
@@ -1230,7 +1224,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
/* Modified by ip_set_destroy() only, which is serialized */
inst->is_destroyed = false;
} else {
- u32 flags = flag_exist(nlh);
+ u32 flags = flag_exist(info->nlh);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1264,12 +1258,10 @@ ip_set_flush_set(struct ip_set *set)
ip_set_unlock(set);
}
-static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_flush(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct ip_set *s;
ip_set_id_t i;
@@ -1304,12 +1296,10 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};
-static int ip_set_rename(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct ip_set *set, *s;
const char *name2;
ip_set_id_t i;
@@ -1354,12 +1344,10 @@ out:
* so the ip_set_list always contains valid pointers to the sets.
*/
-static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct ip_set *from, *to;
ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN];
@@ -1669,10 +1657,8 @@ out:
return ret < 0 ? ret : skb->len;
}
-static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_dump(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
@@ -1683,7 +1669,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
.dump = ip_set_dump_do,
.done = ip_set_dump_done,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
}
@@ -1817,30 +1803,24 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
return ret;
}
-static int ip_set_uadd(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_uadd(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- return ip_set_ad(net, ctnl, skb,
- IPSET_ADD, nlh, attr, extack);
+ return ip_set_ad(info->net, info->sk, skb,
+ IPSET_ADD, info->nlh, attr, info->extack);
}
-static int ip_set_udel(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_udel(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- return ip_set_ad(net, ctnl, skb,
- IPSET_DEL, nlh, attr, extack);
+ return ip_set_ad(info->net, info->sk, skb,
+ IPSET_DEL, info->nlh, attr, info->extack);
}
-static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_utest(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
@@ -1872,12 +1852,10 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
/* Get headed data of a set */
-static int ip_set_header(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
const struct ip_set *set;
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1895,7 +1873,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
IPSET_CMD_HEADER);
if (!nlh2)
goto nlmsg_failure;
@@ -1907,7 +1885,8 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret < 0)
return ret;
@@ -1929,10 +1908,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
};
-static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_type(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1955,7 +1932,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
IPSET_CMD_TYPE);
if (!nlh2)
goto nlmsg_failure;
@@ -1968,7 +1945,8 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
nlmsg_end(skb2, nlh2);
pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret < 0)
return ret;
@@ -1988,10 +1966,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
};
-static int ip_set_protocol(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -2004,7 +1980,7 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
IPSET_CMD_PROTOCOL);
if (!nlh2)
goto nlmsg_failure;
@@ -2014,7 +1990,8 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret < 0)
return ret;
@@ -2029,12 +2006,10 @@ nlmsg_failure:
/* Get set by name or index, from userspace */
-static int ip_set_byname(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
@@ -2053,7 +2028,7 @@ static int ip_set_byname(struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
IPSET_CMD_GET_BYNAME);
if (!nlh2)
goto nlmsg_failure;
@@ -2063,7 +2038,8 @@ static int ip_set_byname(struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret < 0)
return ret;
@@ -2081,12 +2057,10 @@ static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_INDEX] = { .type = NLA_U16 },
};
-static int ip_set_byindex(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const attr[],
- struct netlink_ext_ack *extack)
+static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(net);
+ struct ip_set_net *inst = ip_set_pernet(info->net);
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
@@ -2108,7 +2082,7 @@ static int ip_set_byindex(struct net *net, struct sock *ctnl,
if (!skb2)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
IPSET_CMD_GET_BYINDEX);
if (!nlh2)
goto nlmsg_failure;
@@ -2117,7 +2091,8 @@ static int ip_set_byindex(struct net *net, struct sock *ctnl,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret < 0)
return ret;
@@ -2133,80 +2108,96 @@ nlmsg_failure:
static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
[IPSET_CMD_NONE] = {
.call = ip_set_none,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
},
[IPSET_CMD_CREATE] = {
.call = ip_set_create,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_create_policy,
},
[IPSET_CMD_DESTROY] = {
.call = ip_set_destroy,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_FLUSH] = {
.call = ip_set_flush,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_RENAME] = {
.call = ip_set_rename,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname2_policy,
},
[IPSET_CMD_SWAP] = {
.call = ip_set_swap,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname2_policy,
},
[IPSET_CMD_LIST] = {
.call = ip_set_dump,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_dump_policy,
},
[IPSET_CMD_SAVE] = {
.call = ip_set_dump,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_ADD] = {
.call = ip_set_uadd,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_adt_policy,
},
[IPSET_CMD_DEL] = {
.call = ip_set_udel,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_adt_policy,
},
[IPSET_CMD_TEST] = {
.call = ip_set_utest,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_adt_policy,
},
[IPSET_CMD_HEADER] = {
.call = ip_set_header,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_TYPE] = {
.call = ip_set_type,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_type_policy,
},
[IPSET_CMD_PROTOCOL] = {
.call = ip_set_protocol,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_protocol_policy,
},
[IPSET_CMD_GET_BYNAME] = {
.call = ip_set_byname,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_setname_policy,
},
[IPSET_CMD_GET_BYINDEX] = {
.call = ip_set_byindex,
+ .type = NFNL_CB_MUTEX,
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_index_policy,
},
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 44e3cb80e2e0..8690fc07030f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1524,17 +1524,15 @@ static int ctnetlink_flush_conntrack(struct net *net,
return 0;
}
-static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_del_conntrack(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
- struct nf_conn *ct;
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nf_conntrack_zone zone;
+ struct nf_conn *ct;
int err;
err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1550,15 +1548,15 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
else {
u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
- return ctnetlink_flush_conntrack(net, cda,
+ return ctnetlink_flush_conntrack(info->net, cda,
NETLINK_CB(skb).portid,
- nlmsg_report(nlh), u3);
+ nlmsg_report(info->nlh), u3);
}
if (err < 0)
return err;
- h = nf_conntrack_find_get(net, &zone, &tuple);
+ h = nf_conntrack_find_get(info->net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -1578,28 +1576,26 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
}
}
- nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(info->nlh));
nf_ct_put(ct);
return 0;
}
-static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_get_conntrack(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
- struct nf_conn *ct;
- struct sk_buff *skb2 = NULL;
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
+ struct sk_buff *skb2;
+ struct nf_conn *ct;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = ctnetlink_start,
.dump = ctnetlink_dump_table,
@@ -1607,7 +1603,7 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
.data = (void *)cda,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1626,7 +1622,7 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
if (err < 0)
return err;
- h = nf_conntrack_find_get(net, &zone, &tuple);
+ h = nf_conntrack_find_get(info->net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -1639,13 +1635,16 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
return -ENOMEM;
}
- err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true, 0);
+ err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid,
+ info->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type), ct,
+ true, 0);
nf_ct_put(ct);
if (err <= 0)
goto free;
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (err < 0)
goto out;
@@ -1743,18 +1742,16 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
return ctnetlink_dump_list(skb, cb, true);
}
-static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_get_ct_dying(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_dump_dying,
.done = ctnetlink_done_list,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
return -EOPNOTSUPP;
@@ -1766,18 +1763,16 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
return ctnetlink_dump_list(skb, cb, false);
}
-static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_dump_unconfirmed,
.done = ctnetlink_done_list,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
return -EOPNOTSUPP;
@@ -2374,18 +2369,16 @@ err1:
return ERR_PTR(err);
}
-static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_new_conntrack(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct nf_conn *ct;
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
+ struct nf_conn *ct;
int err;
err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -2407,13 +2400,13 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
}
if (cda[CTA_TUPLE_ORIG])
- h = nf_conntrack_find_get(net, &zone, &otuple);
+ h = nf_conntrack_find_get(info->net, &zone, &otuple);
else if (cda[CTA_TUPLE_REPLY])
- h = nf_conntrack_find_get(net, &zone, &rtuple);
+ h = nf_conntrack_find_get(info->net, &zone, &rtuple);
if (h == NULL) {
err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE) {
+ if (info->nlh->nlmsg_flags & NLM_F_CREATE) {
enum ip_conntrack_events events;
if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
@@ -2421,8 +2414,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
if (otuple.dst.protonum != rtuple.dst.protonum)
return -EINVAL;
- ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
- &rtuple, u3);
+ ct = ctnetlink_create_conntrack(info->net, &zone, cda,
+ &otuple, &rtuple, u3);
if (IS_ERR(ct))
return PTR_ERR(ct);
@@ -2445,7 +2438,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
(1 << IPCT_SYNPROXY) |
events,
ct, NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
+ nlmsg_report(info->nlh));
nf_ct_put(ct);
}
@@ -2455,7 +2448,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
err = -EEXIST;
ct = nf_ct_tuplehash_to_ctrack(h);
- if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
+ if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
err = ctnetlink_change_conntrack(ct, cda);
if (err == 0) {
nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
@@ -2467,7 +2460,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
(1 << IPCT_MARK) |
(1 << IPCT_SYNPROXY),
ct, NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
+ nlmsg_report(info->nlh));
}
}
@@ -2539,17 +2532,15 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_stat_ct_cpu(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_ct_stat_cpu_dump,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
return 0;
@@ -2585,10 +2576,8 @@ nlmsg_failure:
return -1;
}
-static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_stat_ct(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
struct sk_buff *skb2;
int err;
@@ -2598,13 +2587,14 @@ static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
return -ENOMEM;
err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
+ info->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type),
sock_net(skb->sk));
if (err <= 0)
goto free;
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (err < 0)
goto out;
@@ -3284,29 +3274,29 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
return err;
}
-static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_get_expect(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
- struct sk_buff *skb2;
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
+ struct sk_buff *skb2;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
if (cda[CTA_EXPECT_MASTER])
- return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda,
- extack);
+ return ctnetlink_dump_exp_ct(info->net, info->sk, skb,
+ info->nlh, cda,
+ info->extack);
else {
struct netlink_dump_control c = {
.dump = ctnetlink_exp_dump_table,
.done = ctnetlink_exp_done,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
}
@@ -3326,7 +3316,7 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
if (err < 0)
return err;
- exp = nf_ct_expect_find_get(net, &zone, &tuple);
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
if (!exp)
return -ENOENT;
@@ -3348,13 +3338,15 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
+ info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+ exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
if (err <= 0)
goto free;
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (err < 0)
goto out;
@@ -3382,15 +3374,14 @@ static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
return true;
}
-static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_del_expect(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
int err;
@@ -3406,7 +3397,7 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
return err;
/* bump usage count to 2 */
- exp = nf_ct_expect_find_get(net, &zone, &tuple);
+ exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
if (!exp)
return -ENOENT;
@@ -3422,7 +3413,7 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
spin_lock_bh(&nf_conntrack_expect_lock);
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
+ nlmsg_report(info->nlh));
nf_ct_expect_put(exp);
}
spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -3432,14 +3423,14 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
} else if (cda[CTA_EXPECT_HELP_NAME]) {
char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
- nf_ct_expect_iterate_net(net, expect_iter_name, name,
+ nf_ct_expect_iterate_net(info->net, expect_iter_name, name,
NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
+ nlmsg_report(info->nlh));
} else {
/* This basically means we have to flush everything*/
- nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+ nf_ct_expect_iterate_net(info->net, expect_iter_all, NULL,
NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
+ nlmsg_report(info->nlh));
}
return 0;
@@ -3635,15 +3626,14 @@ err_ct:
return err;
}
-static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_new_expect(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
int err;
@@ -3662,20 +3652,20 @@ static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
return err;
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = __nf_ct_expect_find(net, &zone, &tuple);
+ exp = __nf_ct_expect_find(info->net, &zone, &tuple);
if (!exp) {
spin_unlock_bh(&nf_conntrack_expect_lock);
err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE) {
- err = ctnetlink_create_expect(net, &zone, cda, u3,
+ if (info->nlh->nlmsg_flags & NLM_F_CREATE) {
+ err = ctnetlink_create_expect(info->net, &zone, cda, u3,
NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
+ nlmsg_report(info->nlh));
}
return err;
}
err = -EEXIST;
- if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+ if (!(info->nlh->nlmsg_flags & NLM_F_EXCL))
err = ctnetlink_change_expect(exp, cda);
spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -3736,17 +3726,15 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnetlink_exp_stat_cpu_dump,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
return 0;
@@ -3763,35 +3751,71 @@ static struct nf_exp_event_notifier ctnl_notifier_exp = {
#endif
static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
- [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
- .attr_count = CTA_MAX,
- .policy = ct_nla_policy },
- [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX,
- .policy = ct_nla_policy },
- [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
- .attr_count = CTA_MAX,
- .policy = ct_nla_policy },
- [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX,
- .policy = ct_nla_policy },
- [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu },
- [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct },
- [IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying },
- [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed },
+ [IPCTNL_MSG_CT_NEW] = {
+ .call = ctnetlink_new_conntrack,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy
+ },
+ [IPCTNL_MSG_CT_GET] = {
+ .call = ctnetlink_get_conntrack,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy
+ },
+ [IPCTNL_MSG_CT_DELETE] = {
+ .call = ctnetlink_del_conntrack,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy
+ },
+ [IPCTNL_MSG_CT_GET_CTRZERO] = {
+ .call = ctnetlink_get_conntrack,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_MAX,
+ .policy = ct_nla_policy
+ },
+ [IPCTNL_MSG_CT_GET_STATS_CPU] = {
+ .call = ctnetlink_stat_ct_cpu,
+ .type = NFNL_CB_MUTEX,
+ },
+ [IPCTNL_MSG_CT_GET_STATS] = {
+ .call = ctnetlink_stat_ct,
+ .type = NFNL_CB_MUTEX,
+ },
+ [IPCTNL_MSG_CT_GET_DYING] = {
+ .call = ctnetlink_get_ct_dying,
+ .type = NFNL_CB_MUTEX,
+ },
+ [IPCTNL_MSG_CT_GET_UNCONFIRMED] = {
+ .call = ctnetlink_get_ct_unconfirmed,
+ .type = NFNL_CB_MUTEX,
+ },
};
static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
- [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
- .attr_count = CTA_EXPECT_MAX,
- .policy = exp_nla_policy },
- [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
- .attr_count = CTA_EXPECT_MAX,
- .policy = exp_nla_policy },
- [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
- .attr_count = CTA_EXPECT_MAX,
- .policy = exp_nla_policy },
- [IPCTNL_MSG_EXP_GET_STATS_CPU] = { .call = ctnetlink_stat_exp_cpu },
+ [IPCTNL_MSG_EXP_GET] = {
+ .call = ctnetlink_get_expect,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_EXPECT_MAX,
+ .policy = exp_nla_policy
+ },
+ [IPCTNL_MSG_EXP_NEW] = {
+ .call = ctnetlink_new_expect,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_EXPECT_MAX,
+ .policy = exp_nla_policy
+ },
+ [IPCTNL_MSG_EXP_DELETE] = {
+ .call = ctnetlink_del_expect,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_EXPECT_MAX,
+ .policy = exp_nla_policy
+ },
+ [IPCTNL_MSG_EXP_GET_STATS_CPU] = {
+ .call = ctnetlink_stat_exp_cpu,
+ .type = NFNL_CB_MUTEX,
+ },
};
static const struct nfnetlink_subsystem ctnl_subsys = {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 47e9319d2cf3..89e5bac384d7 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -536,15 +536,19 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) {
case NFPROTO_IPV4:
- if (cnet->users4 && (--cnet->users4 == 0))
+ if (cnet->users4 && (--cnet->users4 == 0)) {
nf_unregister_net_hooks(net, ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
+ nf_defrag_ipv4_disable(net);
+ }
break;
#if IS_ENABLED(CONFIG_IPV6)
case NFPROTO_IPV6:
- if (cnet->users6 && (--cnet->users6 == 0))
+ if (cnet->users6 && (--cnet->users6 == 0)) {
nf_unregister_net_hooks(net, ipv6_conntrack_ops,
ARRAY_SIZE(ipv6_conntrack_ops));
+ nf_defrag_ipv6_disable(net);
+ }
break;
#endif
case NFPROTO_BRIDGE:
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 2518818ed479..13234641cdb3 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -1011,6 +1011,7 @@ static void __net_exit nf_log_syslog_net_exit(struct net *net)
nf_log_unset(net, &nf_arp_logger);
nf_log_unset(net, &nf_ip6_logger);
nf_log_unset(net, &nf_netdev_logger);
+ nf_log_unset(net, &nf_bridge_logger);
}
static struct pernet_operations nf_log_syslog_net_ops = {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b7c3c902290f..7de595ead06a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -146,43 +146,6 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
return;
}
}
-
-int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
-{
- struct flowi fl;
- unsigned int hh_len;
- struct dst_entry *dst;
- struct sock *sk = skb->sk;
- int err;
-
- err = xfrm_decode_session(skb, &fl, family);
- if (err < 0)
- return err;
-
- dst = skb_dst(skb);
- if (dst->xfrm)
- dst = ((struct xfrm_dst *)dst)->route;
- if (!dst_hold_safe(dst))
- return -EHOSTUNREACH;
-
- if (sk && !net_eq(net, sock_net(sk)))
- sk = NULL;
-
- dst = xfrm_lookup(net, dst, &fl, sk, 0);
- if (IS_ERR(dst))
- return PTR_ERR(dst);
-
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
-
- /* Change in oif may mean change in hh_len. */
- hh_len = skb_dst(skb)->dev->hard_header_len;
- if (skb_headroom(skb) < hh_len &&
- pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
- return -ENOMEM;
- return 0;
-}
-EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
/* We keep an extra hash for each conntrack, for fast searching. */
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 4731d21fc3ad..48cc60084d28 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -659,6 +659,44 @@ nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
return ret;
}
+#ifdef CONFIG_XFRM
+static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
+{
+ struct sock *sk = skb->sk;
+ struct dst_entry *dst;
+ unsigned int hh_len;
+ struct flowi fl;
+ int err;
+
+ err = xfrm_decode_session(skb, &fl, family);
+ if (err < 0)
+ return err;
+
+ dst = skb_dst(skb);
+ if (dst->xfrm)
+ dst = ((struct xfrm_dst *)dst)->route;
+ if (!dst_hold_safe(dst))
+ return -EHOSTUNREACH;
+
+ if (sk && !net_eq(net, sock_net(sk)))
+ sk = NULL;
+
+ dst = xfrm_lookup(net, dst, &fl, sk, 0);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ /* Change in oif may mean change in hh_len. */
+ hh_len = skb_dst(skb)->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+ return -ENOMEM;
+ return 0;
+}
+#endif
+
static unsigned int
nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 357443b3c0e4..1050f23c0d29 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -21,7 +21,6 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_offload.h>
#include <net/net_namespace.h>
-#include <net/netns/generic.h>
#include <net/sock.h>
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
@@ -106,7 +105,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
switch (nft_net->validate_state) {
case NFT_VALIDATE_SKIP:
@@ -181,7 +180,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
if (!nft_set_is_anonymous(set))
return;
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
@@ -278,9 +277,8 @@ static void nf_tables_unregister_hook(struct net *net,
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
- struct nftables_pernet *nft_net;
+ struct nftables_pernet *nft_net = nft_pernet(net);
- nft_net = net_generic(net, nf_tables_net_id);
list_add_tail(&trans->list, &nft_net->commit_list);
}
@@ -566,7 +564,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
if (nla == NULL)
return ERR_PTR(-EINVAL);
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
list_for_each_entry_rcu(table, &nft_net->tables, list,
lockdep_is_held(&nft_net->commit_mutex)) {
if (!nla_strcmp(nla, table->name) &&
@@ -590,7 +588,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
struct nftables_pernet *nft_net;
struct nft_table *table;
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
list_for_each_entry(table, &nft_net->tables, list) {
if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
nft_active_genmask(table, genmask))
@@ -655,7 +653,7 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
if (ret >= MODULE_NAME_LEN)
return 0;
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
list_for_each_entry(req, &nft_net->module_list, list) {
if (!strcmp(req->module, module_name)) {
if (req->done)
@@ -711,7 +709,7 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
static __be16 nft_base_seq(const struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
return htons(nft_net->base_seq & 0xffff);
}
@@ -793,7 +791,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
goto err;
}
- nft_net = net_generic(ctx->net, nf_tables_net_id);
+ nft_net = nft_pernet(ctx->net);
nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
return;
err:
@@ -811,7 +809,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
int family = nfmsg->nfgen_family;
rcu_read_lock();
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -860,25 +858,25 @@ static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
}
/* called with rcu_read_lock held */
-static int nf_tables_gettable(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_cur(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_cur(info->net);
+ int family = nfmsg->nfgen_family;
const struct nft_table *table;
+ struct net *net = info->net;
struct sk_buff *skb2;
- int family = nfmsg->nfgen_family;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_tables,
.module = THIS_MODULE,
};
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask, 0);
@@ -892,8 +890,8 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
return -ENOMEM;
err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
- family, table);
+ info->nlh->nlmsg_seq, NFT_MSG_NEWTABLE,
+ 0, family, table);
if (err < 0)
goto err_fill_table_info;
@@ -1057,15 +1055,15 @@ static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
return strcmp(obj->key.name, k->name);
}
-static int nf_tables_newtable(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
int family = nfmsg->nfgen_family;
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_ctx ctx;
@@ -1080,14 +1078,15 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
} else {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
return nf_tables_updtable(&ctx);
}
@@ -1128,7 +1127,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table->flags & NFT_TABLE_F_OWNER)
table->nlpid = NETLINK_CB(skb).portid;
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err_trans;
@@ -1221,9 +1220,9 @@ out:
static int nft_flush(struct nft_ctx *ctx, int family)
{
- struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
- struct nft_table *table, *nt;
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table, *nt;
int err = 0;
list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
@@ -1252,19 +1251,19 @@ out:
return err;
}
-static int nf_tables_deltable(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
int family = nfmsg->nfgen_family;
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_ctx ctx;
- nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, 0, NULL, NULL, nla);
if (family == AF_UNSPEC ||
(!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
return nft_flush(&ctx, family);
@@ -1283,7 +1282,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
- if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
table->use > 0)
return -EBUSY;
@@ -1345,7 +1344,7 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
static bool lockdep_commit_lock_is_held(const struct net *net)
{
#ifdef CONFIG_PROVE_LOCKING
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
return lockdep_is_held(&nft_net->commit_mutex);
#else
@@ -1570,7 +1569,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
goto err;
}
- nft_net = net_generic(ctx->net, nf_tables_net_id);
+ nft_net = nft_pernet(ctx->net);
nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
return;
err:
@@ -1581,15 +1580,15 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_table *table;
- const struct nft_chain *chain;
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nftables_pernet *nft_net;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
rcu_read_lock();
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -1625,26 +1624,26 @@ done:
}
/* called with rcu_read_lock held */
-static int nf_tables_getchain(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_cur(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_cur(info->net);
+ int family = nfmsg->nfgen_family;
const struct nft_chain *chain;
+ struct net *net = info->net;
struct nft_table *table;
struct sk_buff *skb2;
- int family = nfmsg->nfgen_family;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_chains,
.module = THIS_MODULE,
};
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, 0);
@@ -1664,8 +1663,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
return -ENOMEM;
err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
- family, table, chain);
+ info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN,
+ 0, family, table, chain);
if (err < 0)
goto err_fill_chain_info;
@@ -1908,7 +1907,7 @@ static int nft_chain_parse_hook(struct net *net,
struct nft_chain_hook *hook, u8 family,
bool autoload)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nft_chain_type *type;
int err;
@@ -2302,7 +2301,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (nla[NFTA_CHAIN_HANDLE] &&
nla[NFTA_CHAIN_NAME]) {
- struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct nft_trans *tmp;
char *name;
@@ -2338,7 +2337,7 @@ err:
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
const struct nlattr *nla)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
struct nft_trans *trans;
@@ -2352,16 +2351,16 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
return ERR_PTR(-ENOENT);
}
-static int nf_tables_newchain(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
int family = nfmsg->nfgen_family;
struct nft_chain *chain = NULL;
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
u8 policy = NF_ACCEPT;
@@ -2433,14 +2432,14 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (flags & ~NFT_CHAIN_FLAGS)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (chain != NULL) {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
flags |= chain->flags & NFT_CHAIN_BASE;
@@ -2451,14 +2450,14 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return nf_tables_addchain(&ctx, family, genmask, policy, flags);
}
-static int nf_tables_delchain(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
int family = nfmsg->nfgen_family;
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_chain *chain;
@@ -2488,11 +2487,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
return PTR_ERR(chain);
}
- if (nlh->nlmsg_flags & NLM_F_NONREC &&
+ if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
chain->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
use = chain->use;
list_for_each_entry(rule, &chain->rules, list) {
@@ -2715,15 +2714,15 @@ err1:
}
static int nf_tables_newexpr(const struct nft_ctx *ctx,
- const struct nft_expr_info *info,
+ const struct nft_expr_info *expr_info,
struct nft_expr *expr)
{
- const struct nft_expr_ops *ops = info->ops;
+ const struct nft_expr_ops *ops = expr_info->ops;
int err;
expr->ops = ops;
if (ops->init) {
- err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
+ err = ops->init(ctx, expr, (const struct nlattr **)expr_info->tb);
if (err < 0)
goto err1;
}
@@ -2747,21 +2746,21 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
const struct nlattr *nla)
{
- struct nft_expr_info info;
+ struct nft_expr_info expr_info;
struct nft_expr *expr;
struct module *owner;
int err;
- err = nf_tables_expr_parse(ctx, nla, &info);
+ err = nf_tables_expr_parse(ctx, nla, &expr_info);
if (err < 0)
goto err1;
err = -ENOMEM;
- expr = kzalloc(info.ops->size, GFP_KERNEL);
+ expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
if (expr == NULL)
goto err2;
- err = nf_tables_newexpr(ctx, &info, expr);
+ err = nf_tables_newexpr(ctx, &expr_info, expr);
if (err < 0)
goto err3;
@@ -2769,9 +2768,9 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
err3:
kfree(expr);
err2:
- owner = info.ops->type->owner;
- if (info.ops->type->release_ops)
- info.ops->type->release_ops(info.ops);
+ owner = expr_info.ops->type->owner;
+ if (expr_info.ops->type->release_ops)
+ expr_info.ops->type->release_ops(expr_info.ops);
module_put(owner);
err1:
@@ -2908,7 +2907,7 @@ nla_put_failure:
static void nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule, int event)
{
- struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct sk_buff *skb;
int err;
@@ -2989,7 +2988,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
struct nftables_pernet *nft_net;
rcu_read_lock();
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -3078,21 +3077,21 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
}
/* called with rcu_read_lock held */
-static int nf_tables_getrule(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_cur(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_cur(info->net);
+ int family = nfmsg->nfgen_family;
const struct nft_chain *chain;
const struct nft_rule *rule;
+ struct net *net = info->net;
struct nft_table *table;
struct sk_buff *skb2;
- int family = nfmsg->nfgen_family;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start= nf_tables_dump_rules_start,
.dump = nf_tables_dump_rules,
@@ -3101,7 +3100,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
.data = (void *)nla,
};
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
@@ -3127,7 +3126,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
return -ENOMEM;
err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+ info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
family, table, chain, rule, NULL);
if (err < 0)
goto err_fill_rule_info;
@@ -3218,28 +3217,28 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
#define NFT_RULE_MAXEXPRS 128
-static int nf_tables_newrule(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
- struct nft_expr_info *info = NULL;
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ unsigned int size, i, n, ulen = 0, usize = 0;
+ u8 genmask = nft_genmask_next(info->net);
+ struct nft_rule *rule, *old_rule = NULL;
+ struct nft_expr_info *expr_info = NULL;
int family = nfmsg->nfgen_family;
+ struct net *net = info->net;
struct nft_flow_rule *flow;
+ struct nft_userdata *udata;
struct nft_table *table;
struct nft_chain *chain;
- struct nft_rule *rule, *old_rule = NULL;
- struct nft_userdata *udata;
- struct nft_trans *trans = NULL;
+ struct nft_trans *trans;
+ u64 handle, pos_handle;
struct nft_expr *expr;
struct nft_ctx ctx;
struct nlattr *tmp;
- unsigned int size, i, n, ulen = 0, usize = 0;
int err, rem;
- u64 handle, pos_handle;
lockdep_assert_held(&nft_net->commit_mutex);
@@ -3278,17 +3277,17 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return PTR_ERR(rule);
}
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return -EEXIST;
}
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
old_rule = rule;
else
return -EOPNOTSUPP;
} else {
- if (!(nlh->nlmsg_flags & NLM_F_CREATE) ||
- nlh->nlmsg_flags & NLM_F_REPLACE)
+ if (!(info->nlh->nlmsg_flags & NLM_F_CREATE) ||
+ info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EINVAL;
handle = nf_tables_alloc_handle(table);
@@ -3311,15 +3310,15 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
n = 0;
size = 0;
if (nla[NFTA_RULE_EXPRESSIONS]) {
- info = kvmalloc_array(NFT_RULE_MAXEXPRS,
- sizeof(struct nft_expr_info),
- GFP_KERNEL);
- if (!info)
+ expr_info = kvmalloc_array(NFT_RULE_MAXEXPRS,
+ sizeof(struct nft_expr_info),
+ GFP_KERNEL);
+ if (!expr_info)
return -ENOMEM;
nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
@@ -3328,10 +3327,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
goto err1;
if (n == NFT_RULE_MAXEXPRS)
goto err1;
- err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
+ err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
if (err < 0)
goto err1;
- size += info[n].ops->size;
+ size += expr_info[n].ops->size;
n++;
}
}
@@ -3365,20 +3364,20 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
expr = nft_expr_first(rule);
for (i = 0; i < n; i++) {
- err = nf_tables_newexpr(&ctx, &info[i], expr);
+ err = nf_tables_newexpr(&ctx, &expr_info[i], expr);
if (err < 0) {
- NL_SET_BAD_ATTR(extack, info[i].attr);
+ NL_SET_BAD_ATTR(extack, expr_info[i].attr);
goto err2;
}
- if (info[i].ops->validate)
+ if (expr_info[i].ops->validate)
nft_validate_state_update(net, NFT_VALIDATE_NEED);
- info[i].ops = NULL;
+ expr_info[i].ops = NULL;
expr = nft_expr_next(expr);
}
- if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
@@ -3398,7 +3397,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
goto err2;
}
- if (nlh->nlmsg_flags & NLM_F_APPEND) {
+ if (info->nlh->nlmsg_flags & NLM_F_APPEND) {
if (old_rule)
list_add_rcu(&rule->list, &old_rule->list);
else
@@ -3410,7 +3409,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
list_add_rcu(&rule->list, &chain->rules);
}
}
- kvfree(info);
+ kvfree(expr_info);
chain->use++;
if (nft_net->validate_state == NFT_VALIDATE_DO)
@@ -3429,20 +3428,21 @@ err2:
nf_tables_rule_release(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
- if (info[i].ops) {
- module_put(info[i].ops->type->owner);
- if (info[i].ops->type->release_ops)
- info[i].ops->type->release_ops(info[i].ops);
+ if (expr_info[i].ops) {
+ module_put(expr_info[i].ops->type->owner);
+ if (expr_info[i].ops->type->release_ops)
+ expr_info[i].ops->type->release_ops(expr_info[i].ops);
}
}
- kvfree(info);
+ kvfree(expr_info);
+
return err;
}
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
const struct nlattr *nla)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
struct nft_trans *trans;
@@ -3456,17 +3456,17 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
return ERR_PTR(-ENOENT);
}
-static int nf_tables_delrule(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
- struct nft_table *table;
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ int family = nfmsg->nfgen_family, err = 0;
+ u8 genmask = nft_genmask_next(info->net);
struct nft_chain *chain = NULL;
+ struct net *net = info->net;
+ struct nft_table *table;
struct nft_rule *rule;
- int family = nfmsg->nfgen_family, err = 0;
struct nft_ctx ctx;
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
@@ -3487,7 +3487,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
return -EOPNOTSUPP;
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
@@ -3559,7 +3559,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
const struct nft_set_desc *desc,
enum nft_set_policies policy)
{
- struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
struct nft_set_estimate est, best;
const struct nft_set_type *type;
@@ -3704,9 +3704,9 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
static struct nft_set *nft_set_lookup_byid(const struct net *net,
const struct nlattr *nla, u8 genmask)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
- struct nft_trans *trans;
+ struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
+ struct nft_trans *trans;
list_for_each_entry(trans, &nft_net->commit_list, list) {
if (trans->msg_type == NFT_MSG_NEWSET) {
@@ -3942,7 +3942,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
const struct nft_set *set, int event,
gfp_t gfp_flags)
{
- struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct sk_buff *skb;
u32 portid = ctx->portid;
int err;
@@ -3980,7 +3980,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
rcu_read_lock();
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -4047,25 +4047,25 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
}
/* called with rcu_read_lock held */
-static int nf_tables_getset(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- u8 genmask = nft_genmask_cur(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_cur(info->net);
+ struct net *net = info->net;
const struct nft_set *set;
- struct nft_ctx ctx;
struct sk_buff *skb2;
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ struct nft_ctx ctx;
int err;
/* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
genmask, 0);
if (err < 0)
return err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_sets_start,
.dump = nf_tables_dump_sets,
@@ -4074,7 +4074,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
.module = THIS_MODULE,
};
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
/* Only accept unspec with dump */
@@ -4168,28 +4168,27 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
return err;
}
-static int nf_tables_newset(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u32 ktype, dtype, flags, policy, gc_int, objtype;
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
int family = nfmsg->nfgen_family;
const struct nft_set_ops *ops;
struct nft_expr *expr = NULL;
+ struct net *net = info->net;
+ struct nft_set_desc desc;
struct nft_table *table;
+ unsigned char *udata;
struct nft_set *set;
struct nft_ctx ctx;
- char *name;
- u64 size;
u64 timeout;
- u32 ktype, dtype, flags, policy, gc_int, objtype;
- struct nft_set_desc desc;
- unsigned char *udata;
+ char *name;
+ int err, i;
u16 udlen;
- int err;
- int i;
+ u64 size;
if (nla[NFTA_SET_TABLE] == NULL ||
nla[NFTA_SET_NAME] == NULL ||
@@ -4297,7 +4296,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
@@ -4306,17 +4305,17 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
return PTR_ERR(set);
}
} else {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return -EEXIST;
}
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
return 0;
}
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
ops = nft_select_set_ops(&ctx, nla, &desc, policy);
@@ -4450,13 +4449,13 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
kvfree(set);
}
-static int nf_tables_delset(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -4467,7 +4466,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -4485,7 +4484,8 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
return PTR_ERR(set);
}
if (set->use ||
- (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
+ (info->nlh->nlmsg_flags & NLM_F_NONREC &&
+ atomic_read(&set->nelems) > 0)) {
NL_SET_BAD_ATTR(extack, attr);
return -EBUSY;
}
@@ -4833,7 +4833,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
int event;
rcu_read_lock();
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
dump_ctx->ctx.family != table->family)
@@ -5065,18 +5065,19 @@ err_fill_setelem:
}
/* called with rcu_read_lock held */
-static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_getsetelem(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- u8 genmask = nft_genmask_cur(net);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_cur(info->net);
+ struct net *net = info->net;
struct nft_set *set;
struct nlattr *attr;
struct nft_ctx ctx;
int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -5085,7 +5086,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
if (IS_ERR(set))
return PTR_ERR(set);
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_set_start,
.dump = nf_tables_dump_set,
@@ -5098,7 +5099,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
};
c.data = &dump_ctx;
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
@@ -5138,7 +5139,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
goto err;
}
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
return;
err:
@@ -5655,13 +5656,14 @@ err_set_elem_expr_clone:
return err;
}
-static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_newsetelem(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
- u8 genmask = nft_genmask_next(net);
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -5670,7 +5672,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -5684,7 +5686,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
+ err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
if (err < 0)
return err;
}
@@ -5867,18 +5869,19 @@ err1:
return err;
}
-static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_delsetelem(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- u8 genmask = nft_genmask_next(net);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -6162,15 +6165,15 @@ err_free_trans:
return err;
}
-static int nf_tables_newobj(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
const struct nft_object_type *type;
- u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
+ struct net *net = info->net;
struct nft_table *table;
struct nft_object *obj;
struct nft_ctx ctx;
@@ -6198,20 +6201,20 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
return err;
}
} else {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
return -EEXIST;
}
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
type = __nft_obj_type_get(objtype);
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
type = nft_obj_type_get(net, objtype);
if (IS_ERR(type))
@@ -6323,7 +6326,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
reset = true;
rcu_read_lock();
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -6418,22 +6421,22 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
}
/* called with rcu_read_lock held */
-static int nf_tables_getobj(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_cur(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_cur(info->net);
int family = nfmsg->nfgen_family;
const struct nft_table *table;
+ struct net *net = info->net;
struct nft_object *obj;
struct sk_buff *skb2;
bool reset = false;
u32 objtype;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_obj_start,
.dump = nf_tables_dump_obj,
@@ -6442,7 +6445,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
.data = (void *)nla,
};
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
if (!nla[NFTA_OBJ_NAME] ||
@@ -6466,14 +6469,14 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
if (!skb2)
return -ENOMEM;
- if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
reset = true;
if (reset) {
const struct nftables_pernet *nft_net;
char *buf;
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
audit_log_nfcfg(buf,
@@ -6485,7 +6488,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
}
err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+ info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
family, table, obj, reset);
if (err < 0)
goto err_fill_obj_info;
@@ -6508,14 +6511,14 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
kfree(obj);
}
-static int nf_tables_delobj(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
int family = nfmsg->nfgen_family;
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_object *obj;
@@ -6551,7 +6554,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
return -EBUSY;
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nft_delobj(&ctx, obj);
}
@@ -6560,7 +6563,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
int family, int report, gfp_t gfp)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
int err;
char *buf = kasprintf(gfp, "%s:%u",
@@ -6938,19 +6941,19 @@ err_flowtable_update_hook:
}
-static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_newflowtable(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
struct nft_flowtable_hook flowtable_hook;
+ u8 genmask = nft_genmask_next(info->net);
const struct nf_flowtable_type *type;
- u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
struct nft_hook *hook, *next;
+ struct net *net = info->net;
struct nft_table *table;
struct nft_ctx ctx;
int err;
@@ -6976,17 +6979,17 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
return err;
}
} else {
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
return -EEXIST;
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
- return nft_flowtable_update(&ctx, nlh, flowtable);
+ return nft_flowtable_update(&ctx, info->nlh, flowtable);
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
if (!flowtable)
@@ -7127,16 +7130,16 @@ err_flowtable_del_hook:
return err;
}
-static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_delflowtable(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_next(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ struct netlink_ext_ack *extack = info->extack;
+ u8 genmask = nft_genmask_next(info->net);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
+ struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_ctx ctx;
@@ -7166,7 +7169,7 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
return PTR_ERR(flowtable);
}
- nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
if (nla[NFTA_FLOWTABLE_HOOK])
return nft_delflowtable_hook(&ctx, flowtable);
@@ -7246,7 +7249,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
const struct nft_table *table;
rcu_read_lock();
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -7322,21 +7325,20 @@ static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
}
/* called with rcu_read_lock held */
-static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_getflowtable(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u8 genmask = nft_genmask_cur(net);
+ const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u8 genmask = nft_genmask_cur(info->net);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
const struct nft_table *table;
+ struct net *net = info->net;
struct sk_buff *skb2;
int err;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_flowtable_start,
.dump = nf_tables_dump_flowtable,
@@ -7345,7 +7347,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
.data = (void *)nla,
};
- return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
if (!nla[NFTA_FLOWTABLE_NAME])
@@ -7366,7 +7368,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
return -ENOMEM;
err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
+ info->nlh->nlmsg_seq,
NFT_MSG_NEWFLOWTABLE, 0, family,
flowtable, &flowtable->hook_list);
if (err < 0)
@@ -7384,7 +7386,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
struct list_head *hook_list,
int event)
{
- struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct sk_buff *skb;
int err;
@@ -7429,7 +7431,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nlmsghdr *nlh;
char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
@@ -7482,7 +7484,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
return 0;
net = dev_net(dev);
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
list_for_each_entry(flowtable, &table->flowtables, list) {
@@ -7528,10 +7530,8 @@ err:
-ENOBUFS);
}
-static int nf_tables_getgen(struct net *net, struct sock *nlsk,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack)
+static int nf_tables_getgen(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
{
struct sk_buff *skb2;
int err;
@@ -7540,12 +7540,12 @@ static int nf_tables_getgen(struct net *net, struct sock *nlsk,
if (skb2 == NULL)
return -ENOMEM;
- err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq);
+ err = nf_tables_fill_gen_info(skb2, info->net, NETLINK_CB(skb).portid,
+ info->nlh->nlmsg_seq);
if (err < 0)
goto err_fill_gen_info;
- return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
err_fill_gen_info:
kfree_skb(skb2);
@@ -7554,115 +7554,138 @@ err_fill_gen_info:
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
- .call_batch = nf_tables_newtable,
+ .call = nf_tables_newtable,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_GETTABLE] = {
- .call_rcu = nf_tables_gettable,
+ .call = nf_tables_gettable,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_DELTABLE] = {
- .call_batch = nf_tables_deltable,
+ .call = nf_tables_deltable,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
[NFT_MSG_NEWCHAIN] = {
- .call_batch = nf_tables_newchain,
+ .call = nf_tables_newchain,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
[NFT_MSG_GETCHAIN] = {
- .call_rcu = nf_tables_getchain,
+ .call = nf_tables_getchain,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
[NFT_MSG_DELCHAIN] = {
- .call_batch = nf_tables_delchain,
+ .call = nf_tables_delchain,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
[NFT_MSG_NEWRULE] = {
- .call_batch = nf_tables_newrule,
+ .call = nf_tables_newrule,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
[NFT_MSG_GETRULE] = {
- .call_rcu = nf_tables_getrule,
+ .call = nf_tables_getrule,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
[NFT_MSG_DELRULE] = {
- .call_batch = nf_tables_delrule,
+ .call = nf_tables_delrule,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
[NFT_MSG_NEWSET] = {
- .call_batch = nf_tables_newset,
+ .call = nf_tables_newset,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_GETSET] = {
- .call_rcu = nf_tables_getset,
+ .call = nf_tables_getset,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_DELSET] = {
- .call_batch = nf_tables_delset,
+ .call = nf_tables_delset,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
[NFT_MSG_NEWSETELEM] = {
- .call_batch = nf_tables_newsetelem,
+ .call = nf_tables_newsetelem,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETSETELEM] = {
- .call_rcu = nf_tables_getsetelem,
+ .call = nf_tables_getsetelem,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_DELSETELEM] = {
- .call_batch = nf_tables_delsetelem,
+ .call = nf_tables_delsetelem,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETGEN] = {
- .call_rcu = nf_tables_getgen,
+ .call = nf_tables_getgen,
+ .type = NFNL_CB_RCU,
},
[NFT_MSG_NEWOBJ] = {
- .call_batch = nf_tables_newobj,
+ .call = nf_tables_newobj,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ] = {
- .call_rcu = nf_tables_getobj,
+ .call = nf_tables_getobj,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_DELOBJ] = {
- .call_batch = nf_tables_delobj,
+ .call = nf_tables_delobj,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ_RESET] = {
- .call_rcu = nf_tables_getobj,
+ .call = nf_tables_getobj,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
[NFT_MSG_NEWFLOWTABLE] = {
- .call_batch = nf_tables_newflowtable,
+ .call = nf_tables_newflowtable,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
[NFT_MSG_GETFLOWTABLE] = {
- .call_rcu = nf_tables_getflowtable,
+ .call = nf_tables_getflowtable,
+ .type = NFNL_CB_RCU,
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
[NFT_MSG_DELFLOWTABLE] = {
- .call_batch = nf_tables_delflowtable,
+ .call = nf_tables_delflowtable,
+ .type = NFNL_CB_BATCH,
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
@@ -7670,7 +7693,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
static int nf_tables_validate(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_table *table;
switch (nft_net->validate_state) {
@@ -7855,7 +7878,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
static void nf_tables_commit_chain_prepare_cancel(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
@@ -7967,7 +7990,7 @@ static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
static void nf_tables_module_autoload_cleanup(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_module_request *req, *next;
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
@@ -7980,7 +8003,7 @@ static void nf_tables_module_autoload_cleanup(struct net *net)
static void nf_tables_commit_release(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans;
/* all side effects have to be made visible.
@@ -8014,7 +8037,7 @@ static void nf_tables_commit_release(struct net *net)
static void nft_commit_notify(struct net *net, u32 portid)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *batch_skb = NULL, *nskb, *skb;
unsigned char *data;
int len;
@@ -8101,7 +8124,7 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
struct nft_chain *chain;
@@ -8322,7 +8345,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
static void nf_tables_module_autoload(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_module_request *req, *next;
LIST_HEAD(module_list);
@@ -8370,7 +8393,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
struct nft_hook *hook;
@@ -8524,7 +8547,7 @@ static void nf_tables_cleanup(struct net *net)
static int nf_tables_abort(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
int ret = __nf_tables_abort(net, action);
mutex_unlock(&nft_net->commit_mutex);
@@ -8534,7 +8557,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
static bool nf_tables_valid_genid(struct net *net, u32 genid)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
@@ -9096,7 +9119,7 @@ static void __nft_release_hook(struct net *net, struct nft_table *table)
static void __nft_release_hooks(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_table *table;
list_for_each_entry(table, &nft_net->tables, list) {
@@ -9156,7 +9179,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
static void __nft_release_tables(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_table *table, *nt;
list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
@@ -9179,7 +9202,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -9207,7 +9230,7 @@ static struct notifier_block nft_nl_notifier = {
static int __net_init nf_tables_init_net(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
@@ -9227,7 +9250,7 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
mutex_lock(&nft_net->commit_mutex);
if (!list_empty(&nft_net->commit_list))
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 19215e81dd66..a48c5fd53a80 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -7,8 +7,6 @@
#include <net/netfilter/nf_tables_offload.h>
#include <net/pkt_cls.h>
-extern unsigned int nf_tables_net_id;
-
static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
{
struct nft_flow_rule *flow;
@@ -389,7 +387,7 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
basechain, &extack);
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
mutex_lock(&nft_net->commit_mutex);
list_del(&block_cb->driver_list);
list_move(&block_cb->list, &bo.cb_list);
@@ -490,7 +488,7 @@ static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
static void nft_flow_rule_offload_abort(struct net *net,
struct nft_trans *trans)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
int err = 0;
list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) {
@@ -539,7 +537,7 @@ static void nft_flow_rule_offload_abort(struct net *net,
int nft_flow_rule_offload_commit(struct net *net)
{
- struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans;
int err = 0;
u8 policy;
@@ -663,7 +661,7 @@ static int nft_offload_netdev_event(struct notifier_block *this,
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
- nft_net = net_generic(net, nf_tables_net_id);
+ nft_net = nft_pernet(net);
mutex_lock(&nft_net->commit_mutex);
chain = __nft_offload_get_chain(nft_net, dev);
if (chain)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 06f5886f652e..d7a9628b6cee 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -252,6 +252,12 @@ replay:
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;
__u8 subsys_id = NFNL_SUBSYS_ID(type);
+ struct nfnl_info info = {
+ .net = net,
+ .sk = nfnlnet->nfnl,
+ .nlh = nlh,
+ .extack = extack,
+ };
/* Sanity-check NFNL_MAX_ATTR_COUNT */
if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
@@ -267,24 +273,30 @@ replay:
return err;
}
- if (nc->call_rcu) {
- err = nc->call_rcu(net, nfnlnet->nfnl, skb, nlh,
- (const struct nlattr **)cda,
- extack);
+ if (!nc->call) {
rcu_read_unlock();
- } else {
+ return -EINVAL;
+ }
+
+ switch (nc->type) {
+ case NFNL_CB_RCU:
+ err = nc->call(skb, &info, (const struct nlattr **)cda);
+ rcu_read_unlock();
+ break;
+ case NFNL_CB_MUTEX:
rcu_read_unlock();
nfnl_lock(subsys_id);
if (nfnl_dereference_protected(subsys_id) != ss ||
- nfnetlink_find_client(type, ss) != nc)
+ nfnetlink_find_client(type, ss) != nc) {
err = -EAGAIN;
- else if (nc->call)
- err = nc->call(net, nfnlnet->nfnl, skb, nlh,
- (const struct nlattr **)cda,
- extack);
- else
- err = -EINVAL;
+ break;
+ }
+ err = nc->call(skb, &info, (const struct nlattr **)cda);
nfnl_unlock(subsys_id);
+ break;
+ default:
+ err = -EINVAL;
+ break;
}
if (err == -EAGAIN)
goto replay;
@@ -462,12 +474,24 @@ replay_abort:
goto ack;
}
+ if (nc->type != NFNL_CB_BATCH) {
+ err = -EINVAL;
+ goto ack;
+ }
+
{
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
- u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+ struct nfnl_net *nfnlnet = nfnl_pernet(net);
struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
struct nlattr *attr = (void *)nlh + min_len;
+ u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
int attrlen = nlh->nlmsg_len - min_len;
+ struct nfnl_info info = {
+ .net = net,
+ .sk = nfnlnet->nfnl,
+ .nlh = nlh,
+ .extack = &extack,
+ };
/* Sanity-check NFTA_MAX_ATTR */
if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
@@ -482,13 +506,7 @@ replay_abort:
if (err < 0)
goto ack;
- if (nc->call_batch) {
- struct nfnl_net *nfnlnet = nfnl_pernet(net);
-
- err = nc->call_batch(net, nfnlnet->nfnl, skb, nlh,
- (const struct nlattr **)cda,
- &extack);
- }
+ err = nc->call(skb, &info, (const struct nlattr **)cda);
/* The lock was released to autoload some module, we
* have to abort and start from scratch using the
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 6895f31c5fbb..3c8cf8748cfb 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -56,15 +56,13 @@ static inline struct nfnl_acct_net *nfnl_acct_pernet(struct net *net)
#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
-static int nfnl_acct_new(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_acct_new(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const tb[])
{
- struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+ struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
struct nf_acct *nfacct, *matching = NULL;
- char *acct_name;
unsigned int size = 0;
+ char *acct_name;
u32 flags = 0;
if (!tb[NFACCT_NAME])
@@ -78,7 +76,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
- if (nlh->nlmsg_flags & NLM_F_EXCL)
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
matching = nfacct;
@@ -86,7 +84,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
}
if (matching) {
- if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
/* reset counters if you request a replacement. */
atomic64_set(&matching->pkts, 0);
atomic64_set(&matching->bytes, 0);
@@ -273,17 +271,15 @@ static int nfnl_acct_start(struct netlink_callback *cb)
return 0;
}
-static int nfnl_acct_get(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_acct_get(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const tb[])
{
- struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+ struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
int ret = -ENOENT;
struct nf_acct *cur;
char *acct_name;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_acct_dump,
.start = nfnl_acct_start,
@@ -291,7 +287,7 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl,
.data = (void *)tb[NFACCT_FILTER],
};
- return netlink_dump_start(nfnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
if (!tb[NFACCT_NAME])
@@ -311,15 +307,15 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl,
}
ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
- NFNL_MSG_ACCT_NEW, cur);
+ info->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+ NFNL_MSG_ACCT_NEW, cur);
if (ret <= 0) {
kfree_skb(skb2);
break;
}
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret > 0)
ret = 0;
@@ -347,12 +343,10 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
return ret;
}
-static int nfnl_acct_del(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_acct_del(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const tb[])
{
- struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+ struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
struct nf_acct *cur, *tmp;
int ret = -ENOENT;
char *acct_name;
@@ -388,18 +382,30 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
};
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
- [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
- .attr_count = NFACCT_MAX,
- .policy = nfnl_acct_policy },
- [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
- .attr_count = NFACCT_MAX,
- .policy = nfnl_acct_policy },
- [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
- .attr_count = NFACCT_MAX,
- .policy = nfnl_acct_policy },
- [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
- .attr_count = NFACCT_MAX,
- .policy = nfnl_acct_policy },
+ [NFNL_MSG_ACCT_NEW] = {
+ .call = nfnl_acct_new,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy
+ },
+ [NFNL_MSG_ACCT_GET] = {
+ .call = nfnl_acct_get,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy
+ },
+ [NFNL_MSG_ACCT_GET_CTRZERO] = {
+ .call = nfnl_acct_get,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy
+ },
+ [NFNL_MSG_ACCT_DEL] = {
+ .call = nfnl_acct_del,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFACCT_MAX,
+ .policy = nfnl_acct_policy
+ },
};
static const struct nfnetlink_subsystem nfnl_acct_subsys = {
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 22f6f7fcc724..322ac5dd5402 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -408,10 +408,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
return 0;
}
-static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_cthelper_new(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const tb[])
{
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
@@ -441,7 +439,7 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
tuple.dst.protonum != cur->tuple.dst.protonum))
continue;
- if (nlh->nlmsg_flags & NLM_F_EXCL)
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
helper = cur;
@@ -607,10 +605,8 @@ out:
return skb->len;
}
-static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const tb[])
{
int ret = -ENOENT;
struct nf_conntrack_helper *cur;
@@ -623,11 +619,11 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_cthelper_dump_table,
};
- return netlink_dump_start(nfnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
if (tb[NFCTH_NAME])
@@ -659,15 +655,15 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
}
ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
+ info->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type),
NFNL_MSG_CTHELPER_NEW, cur);
if (ret <= 0) {
kfree_skb(skb2);
break;
}
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
MSG_DONTWAIT);
if (ret > 0)
ret = 0;
@@ -678,10 +674,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
return ret;
}
-static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const tb[])
{
char *helper_name = NULL;
struct nf_conntrack_helper *cur;
@@ -743,15 +737,24 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
};
static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = {
- [NFNL_MSG_CTHELPER_NEW] = { .call = nfnl_cthelper_new,
- .attr_count = NFCTH_MAX,
- .policy = nfnl_cthelper_policy },
- [NFNL_MSG_CTHELPER_GET] = { .call = nfnl_cthelper_get,
- .attr_count = NFCTH_MAX,
- .policy = nfnl_cthelper_policy },
- [NFNL_MSG_CTHELPER_DEL] = { .call = nfnl_cthelper_del,
- .attr_count = NFCTH_MAX,
- .policy = nfnl_cthelper_policy },
+ [NFNL_MSG_CTHELPER_NEW] = {
+ .call = nfnl_cthelper_new,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFCTH_MAX,
+ .policy = nfnl_cthelper_policy
+ },
+ [NFNL_MSG_CTHELPER_GET] = {
+ .call = nfnl_cthelper_get,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFCTH_MAX,
+ .policy = nfnl_cthelper_policy
+ },
+ [NFNL_MSG_CTHELPER_DEL] = {
+ .call = nfnl_cthelper_del,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFCTH_MAX,
+ .policy = nfnl_cthelper_policy
+ },
};
static const struct nfnetlink_subsystem nfnl_cthelper_subsys = {
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 46da5548d0b3..38848ad68899 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -83,13 +83,11 @@ err:
return ret;
}
-static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int cttimeout_new_timeout(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
- struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+ struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
__u16 l3num;
__u8 l4num;
const struct nf_conntrack_l4proto *l4proto;
@@ -111,7 +109,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- if (nlh->nlmsg_flags & NLM_F_EXCL)
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
matching = timeout;
@@ -119,7 +117,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
}
if (matching) {
- if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
/* You cannot replace one timeout policy by another of
* different kind, sorry.
*/
@@ -129,7 +127,8 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
return ctnl_timeout_parse_policy(&matching->timeout.data,
matching->timeout.l4proto,
- net, cda[CTA_TIMEOUT_DATA]);
+ info->net,
+ cda[CTA_TIMEOUT_DATA]);
}
return -EBUSY;
@@ -150,8 +149,8 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
goto err_proto_put;
}
- ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, net,
- cda[CTA_TIMEOUT_DATA]);
+ ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto,
+ info->net, cda[CTA_TIMEOUT_DATA]);
if (ret < 0)
goto err;
@@ -248,22 +247,20 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int cttimeout_get_timeout(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
- struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+ struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
int ret = -ENOENT;
char *name;
struct ctnl_timeout *cur;
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = ctnl_timeout_dump,
};
- return netlink_dump_start(ctnl, skb, nlh, &c);
+ return netlink_dump_start(info->sk, skb, info->nlh, &c);
}
if (!cda[CTA_TIMEOUT_NAME])
@@ -283,15 +280,15 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
}
ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
+ info->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_NEW, cur);
if (ret <= 0) {
kfree_skb(skb2);
break;
}
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret > 0)
ret = 0;
@@ -320,13 +317,11 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
return ret;
}
-static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int cttimeout_del_timeout(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
- struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+ struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
struct ctnl_timeout *cur, *tmp;
int ret = -ENOENT;
char *name;
@@ -334,7 +329,7 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
if (!cda[CTA_TIMEOUT_NAME]) {
list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list,
head)
- ctnl_timeout_try_del(net, cur);
+ ctnl_timeout_try_del(info->net, cur);
return 0;
}
@@ -344,7 +339,7 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- ret = ctnl_timeout_try_del(net, cur);
+ ret = ctnl_timeout_try_del(info->net, cur);
if (ret < 0)
return ret;
@@ -353,11 +348,9 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
return ret;
}
-static int cttimeout_default_set(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int cttimeout_default_set(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
const struct nf_conntrack_l4proto *l4proto;
__u8 l4num;
@@ -377,7 +370,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
goto err;
}
- ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
+ ret = ctnl_timeout_parse_policy(NULL, l4proto, info->net,
cda[CTA_TIMEOUT_DATA]);
if (ret < 0)
goto err;
@@ -427,11 +420,9 @@ nla_put_failure:
return -1;
}
-static int cttimeout_default_get(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[],
- struct netlink_ext_ack *extack)
+static int cttimeout_default_get(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
const struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts = NULL;
@@ -453,35 +444,35 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
switch (l4proto->l4proto) {
case IPPROTO_ICMP:
- timeouts = &nf_icmp_pernet(net)->timeout;
+ timeouts = &nf_icmp_pernet(info->net)->timeout;
break;
case IPPROTO_TCP:
- timeouts = nf_tcp_pernet(net)->timeouts;
+ timeouts = nf_tcp_pernet(info->net)->timeouts;
break;
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
- timeouts = nf_udp_pernet(net)->timeouts;
+ timeouts = nf_udp_pernet(info->net)->timeouts;
break;
case IPPROTO_DCCP:
#ifdef CONFIG_NF_CT_PROTO_DCCP
- timeouts = nf_dccp_pernet(net)->dccp_timeout;
+ timeouts = nf_dccp_pernet(info->net)->dccp_timeout;
#endif
break;
case IPPROTO_ICMPV6:
- timeouts = &nf_icmpv6_pernet(net)->timeout;
+ timeouts = &nf_icmpv6_pernet(info->net)->timeout;
break;
case IPPROTO_SCTP:
#ifdef CONFIG_NF_CT_PROTO_SCTP
- timeouts = nf_sctp_pernet(net)->timeouts;
+ timeouts = nf_sctp_pernet(info->net)->timeouts;
#endif
break;
case IPPROTO_GRE:
#ifdef CONFIG_NF_CT_PROTO_GRE
- timeouts = nf_gre_pernet(net)->timeouts;
+ timeouts = nf_gre_pernet(info->net)->timeouts;
#endif
break;
case 255:
- timeouts = &nf_generic_pernet(net)->timeout;
+ timeouts = &nf_generic_pernet(info->net)->timeout;
break;
default:
WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
@@ -497,9 +488,10 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
goto err;
}
- ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
+ ret = cttimeout_default_fill_info(info->net, skb2,
+ NETLINK_CB(skb).portid,
+ info->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
l3num, l4proto, timeouts);
if (ret <= 0) {
@@ -507,7 +499,8 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
err = -ENOMEM;
goto err;
}
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret > 0)
ret = 0;
@@ -553,21 +546,36 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
}
static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
- [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout,
- .attr_count = CTA_TIMEOUT_MAX,
- .policy = cttimeout_nla_policy },
- [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout,
- .attr_count = CTA_TIMEOUT_MAX,
- .policy = cttimeout_nla_policy },
- [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout,
- .attr_count = CTA_TIMEOUT_MAX,
- .policy = cttimeout_nla_policy },
- [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set,
- .attr_count = CTA_TIMEOUT_MAX,
- .policy = cttimeout_nla_policy },
- [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get,
- .attr_count = CTA_TIMEOUT_MAX,
- .policy = cttimeout_nla_policy },
+ [IPCTNL_MSG_TIMEOUT_NEW] = {
+ .call = cttimeout_new_timeout,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_TIMEOUT_MAX,
+ .policy = cttimeout_nla_policy
+ },
+ [IPCTNL_MSG_TIMEOUT_GET] = {
+ .call = cttimeout_get_timeout,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_TIMEOUT_MAX,
+ .policy = cttimeout_nla_policy
+ },
+ [IPCTNL_MSG_TIMEOUT_DELETE] = {
+ .call = cttimeout_del_timeout,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_TIMEOUT_MAX,
+ .policy = cttimeout_nla_policy
+ },
+ [IPCTNL_MSG_TIMEOUT_DEFAULT_SET] = {
+ .call = cttimeout_default_set,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_TIMEOUT_MAX,
+ .policy = cttimeout_nla_policy
+ },
+ [IPCTNL_MSG_TIMEOUT_DEFAULT_GET] = {
+ .call = cttimeout_default_get,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = CTA_TIMEOUT_MAX,
+ .policy = cttimeout_nla_policy
+ },
};
static const struct nfnetlink_subsystem cttimeout_subsys = {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d5f458d0ff3d..587086b18c36 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -845,10 +845,8 @@ static struct notifier_block nfulnl_rtnl_notifier = {
.notifier_call = nfulnl_rcv_nl_event,
};
-static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[],
- struct netlink_ext_ack *extack)
+static int nfulnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nfula[])
{
return -ENOTSUPP;
}
@@ -869,18 +867,16 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
[NFULA_CFG_FLAGS] = { .type = NLA_U16 },
};
-static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfula[],
- struct netlink_ext_ack *extack)
+static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nfula[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ struct nfnl_log_net *log = nfnl_log_pernet(info->net);
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
u_int16_t group_num = ntohs(nfmsg->res_id);
- struct nfulnl_instance *inst;
struct nfulnl_msg_config_cmd *cmd = NULL;
- struct nfnl_log_net *log = nfnl_log_pernet(net);
- int ret = 0;
+ struct nfulnl_instance *inst;
u16 flags = 0;
+ int ret = 0;
if (nfula[NFULA_CFG_CMD]) {
u_int8_t pf = nfmsg->nfgen_family;
@@ -889,9 +885,9 @@ static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
/* Commands without queue context */
switch (cmd->command) {
case NFULNL_CFG_CMD_PF_BIND:
- return nf_log_bind_pf(net, pf, &nfulnl_logger);
+ return nf_log_bind_pf(info->net, pf, &nfulnl_logger);
case NFULNL_CFG_CMD_PF_UNBIND:
- nf_log_unbind_pf(net, pf);
+ nf_log_unbind_pf(info->net, pf);
return 0;
}
}
@@ -932,7 +928,7 @@ static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
goto out_put;
}
- inst = instance_create(net, group_num,
+ inst = instance_create(info->net, group_num,
NETLINK_CB(skb).portid,
sk_user_ns(NETLINK_CB(skb).sk));
if (IS_ERR(inst)) {
@@ -993,11 +989,17 @@ out:
}
static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
- [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp,
- .attr_count = NFULA_MAX, },
- [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config,
- .attr_count = NFULA_CFG_MAX,
- .policy = nfula_cfg_policy },
+ [NFULNL_MSG_PACKET] = {
+ .call = nfulnl_recv_unsupp,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFULA_MAX,
+ },
+ [NFULNL_MSG_CONFIG] = {
+ .call = nfulnl_recv_config,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFULA_CFG_MAX,
+ .policy = nfula_cfg_policy
+ },
};
static const struct nfnetlink_subsystem nfulnl_subsys = {
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 916a3c7f9eaf..e8f8875c6884 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -292,10 +292,9 @@ static const struct nla_policy nfnl_osf_policy[OSF_ATTR_MAX + 1] = {
[OSF_ATTR_FINGER] = { .len = sizeof(struct nf_osf_user_finger) },
};
-static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const osf_attrs[],
- struct netlink_ext_ack *extack)
+static int nfnl_osf_add_callback(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const osf_attrs[])
{
struct nf_osf_user_finger *f;
struct nf_osf_finger *kf = NULL, *sf;
@@ -307,7 +306,7 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
if (!osf_attrs[OSF_ATTR_FINGER])
return -EINVAL;
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -EINVAL;
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
@@ -325,7 +324,7 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
kfree(kf);
kf = NULL;
- if (nlh->nlmsg_flags & NLM_F_EXCL)
+ if (info->nlh->nlmsg_flags & NLM_F_EXCL)
err = -EEXIST;
break;
}
@@ -339,11 +338,9 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
return err;
}
-static int nfnl_osf_remove_callback(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const osf_attrs[],
- struct netlink_ext_ack *extack)
+static int nfnl_osf_remove_callback(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const osf_attrs[])
{
struct nf_osf_user_finger *f;
struct nf_osf_finger *sf;
@@ -377,11 +374,13 @@ static int nfnl_osf_remove_callback(struct net *net, struct sock *ctnl,
static const struct nfnl_callback nfnl_osf_callbacks[OSF_MSG_MAX] = {
[OSF_MSG_ADD] = {
.call = nfnl_osf_add_callback,
+ .type = NFNL_CB_MUTEX,
.attr_count = OSF_ATTR_MAX,
.policy = nfnl_osf_policy,
},
[OSF_MSG_REMOVE] = {
.call = nfnl_osf_remove_callback,
+ .type = NFNL_CB_MUTEX,
.attr_count = OSF_ATTR_MAX,
.policy = nfnl_osf_policy,
},
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 37e81d895e61..f37a575ebd7f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1046,20 +1046,18 @@ static int nfq_id_after(unsigned int id, unsigned int max)
return (int)(id - max) > 0;
}
-static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[],
- struct netlink_ext_ack *extack)
+static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nfqa[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u16 queue_num = ntohs(nfmsg->res_id);
struct nf_queue_entry *entry, *tmp;
- unsigned int verdict, maxid;
struct nfqnl_msg_verdict_hdr *vhdr;
struct nfqnl_instance *queue;
+ unsigned int verdict, maxid;
LIST_HEAD(batch_list);
- u16 queue_num = ntohs(nfmsg->res_id);
- struct nfnl_queue_net *q = nfnl_queue_pernet(net);
queue = verdict_instance_lookup(q, queue_num,
NETLINK_CB(skb).portid);
@@ -1158,22 +1156,19 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
return 0;
}
-static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
- struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[],
- struct netlink_ext_ack *extack)
+static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nfqa[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
struct nfqnl_msg_verdict_hdr *vhdr;
+ enum ip_conntrack_info ctinfo;
struct nfqnl_instance *queue;
- unsigned int verdict;
struct nf_queue_entry *entry;
- enum ip_conntrack_info ctinfo;
struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
- struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ unsigned int verdict;
int err;
queue = verdict_instance_lookup(q, queue_num,
@@ -1196,7 +1191,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
if (nfqa[NFQA_CT]) {
if (nfnl_ct != NULL)
- ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
+ ct = nfqnl_ct_parse(nfnl_ct, info->nlh, nfqa, entry,
+ &ctinfo);
}
if (entry->state.pf == PF_BRIDGE) {
@@ -1224,10 +1220,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
return 0;
}
-static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[],
- struct netlink_ext_ack *extack)
+static int nfqnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const cda[])
{
return -ENOTSUPP;
}
@@ -1245,16 +1239,14 @@ static const struct nf_queue_handler nfqh = {
.nf_hook_drop = nfqnl_nf_hook_drop,
};
-static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[],
- struct netlink_ext_ack *extack)
+static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nfqa[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+ struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
- struct nfqnl_instance *queue;
struct nfqnl_msg_config_cmd *cmd = NULL;
- struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ struct nfqnl_instance *queue;
__u32 flags = 0, mask = 0;
int ret = 0;
@@ -1373,17 +1365,29 @@ err_out_unlock:
}
static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
- [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp,
- .attr_count = NFQA_MAX, },
- [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict,
- .attr_count = NFQA_MAX,
- .policy = nfqa_verdict_policy },
- [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config,
- .attr_count = NFQA_CFG_MAX,
- .policy = nfqa_cfg_policy },
- [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch,
- .attr_count = NFQA_MAX,
- .policy = nfqa_verdict_batch_policy },
+ [NFQNL_MSG_PACKET] = {
+ .call = nfqnl_recv_unsupp,
+ .type = NFNL_CB_RCU,
+ .attr_count = NFQA_MAX,
+ },
+ [NFQNL_MSG_VERDICT] = {
+ .call = nfqnl_recv_verdict,
+ .type = NFNL_CB_RCU,
+ .attr_count = NFQA_MAX,
+ .policy = nfqa_verdict_policy
+ },
+ [NFQNL_MSG_CONFIG] = {
+ .call = nfqnl_recv_config,
+ .type = NFNL_CB_MUTEX,
+ .attr_count = NFQA_CFG_MAX,
+ .policy = nfqa_cfg_policy
+ },
+ [NFQNL_MSG_VERDICT_BATCH] = {
+ .call = nfqnl_recv_verdict_batch,
+ .type = NFNL_CB_RCU,
+ .attr_count = NFQA_MAX,
+ .policy = nfqa_verdict_batch_policy
+ },
};
static const struct nfnetlink_subsystem nfqnl_subsys = {
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 7a9aa57b195b..363bdd7044ec 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -2,7 +2,6 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
-#include <net/netns/generic.h>
#include <net/netfilter/nf_tables.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
@@ -11,8 +10,6 @@
#include <net/netfilter/nf_tables_ipv4.h>
#include <net/netfilter/nf_tables_ipv6.h>
-extern unsigned int nf_tables_net_id;
-
#ifdef CONFIG_NF_TABLES_IPV4
static unsigned int nft_do_chain_ipv4(void *priv,
struct sk_buff *skb,
@@ -369,7 +366,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
event != NETDEV_CHANGENAME)
return NOTIFY_DONE;
- nft_net = net_generic(ctx.net, nf_tables_net_id);
+ nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
if (table->family != NFPROTO_NETDEV)
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index b8dbd20a6a4c..5415ab14400d 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -613,17 +613,15 @@ nla_put_failure:
return -1;
}
-static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_compat_get_rcu(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const tb[])
{
- int ret = 0, target;
struct nfgenmsg *nfmsg;
- const char *fmt;
- const char *name;
- u32 rev;
+ const char *name, *fmt;
struct sk_buff *skb2;
+ int ret = 0, target;
+ u32 rev;
if (tb[NFTA_COMPAT_NAME] == NULL ||
tb[NFTA_COMPAT_REV] == NULL ||
@@ -634,7 +632,7 @@ static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
- nfmsg = nlmsg_data(nlh);
+ nfmsg = nlmsg_data(info->nlh);
switch(nfmsg->nfgen_family) {
case AF_INET:
@@ -673,8 +671,8 @@ static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
/* include the best revision for this extension in the message */
if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
+ info->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type),
NFNL_MSG_COMPAT_GET,
nfmsg->nfgen_family,
name, ret, target) <= 0) {
@@ -682,8 +680,8 @@ static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
goto out_put;
}
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
+ ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
if (ret > 0)
ret = 0;
out_put:
@@ -700,9 +698,12 @@ static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
};
static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
- [NFNL_MSG_COMPAT_GET] = { .call_rcu = nfnl_compat_get_rcu,
- .attr_count = NFTA_COMPAT_MAX,
- .policy = nfnl_compat_policy_get },
+ [NFNL_MSG_COMPAT_GET] = {
+ .call = nfnl_compat_get_rcu,
+ .type = NFNL_CB_RCU,
+ .attr_count = NFTA_COMPAT_MAX,
+ .policy = nfnl_compat_policy_get
+ },
};
static const struct nfnetlink_subsystem nfnl_compat_subsys = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index f9437a0dcfef..6ba3256fa844 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -11,9 +11,6 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
-#include <net/netns/generic.h>
-
-extern unsigned int nf_tables_net_id;
struct nft_dynset {
struct nft_set *set;
@@ -164,7 +161,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct nft_dynset *priv = nft_expr_priv(expr);
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set *set;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index c9b8a2b03b71..9c169d100651 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -9,6 +9,7 @@
struct nft_socket {
enum nft_socket_keys key:8;
+ u8 level;
union {
u8 dreg;
};
@@ -33,6 +34,26 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
}
}
+#ifdef CONFIG_CGROUPS
+static noinline bool
+nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
+{
+ struct sock *sk = skb_to_full_sk(pkt->skb);
+ struct cgroup *cgrp;
+
+ if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+ return false;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ if (level > cgrp->level)
+ return false;
+
+ memcpy(dest, &cgrp->ancestor_ids[level], sizeof(u64));
+
+ return true;
+}
+#endif
+
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -85,6 +106,14 @@ static void nft_socket_eval(const struct nft_expr *expr,
}
nft_socket_wildcard(pkt, regs, sk, dest);
break;
+#ifdef CONFIG_CGROUPS
+ case NFT_SOCKET_CGROUPV2:
+ if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+ break;
+#endif
default:
WARN_ON(1);
regs->verdict.code = NFT_BREAK;
@@ -97,6 +126,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {
[NFTA_SOCKET_KEY] = { .type = NLA_U32 },
[NFTA_SOCKET_DREG] = { .type = NLA_U32 },
+ [NFTA_SOCKET_LEVEL] = { .type = NLA_U32 },
};
static int nft_socket_init(const struct nft_ctx *ctx,
@@ -104,7 +134,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_socket *priv = nft_expr_priv(expr);
- unsigned int len;
+ unsigned int len, level;
if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY])
return -EINVAL;
@@ -129,6 +159,19 @@ static int nft_socket_init(const struct nft_ctx *ctx,
case NFT_SOCKET_MARK:
len = sizeof(u32);
break;
+#ifdef CONFIG_CGROUPS
+ case NFT_SOCKET_CGROUPV2:
+ if (!tb[NFTA_SOCKET_LEVEL])
+ return -EINVAL;
+
+ level = ntohl(nla_get_u32(tb[NFTA_SOCKET_LEVEL]));
+ if (level > 255)
+ return -EOPNOTSUPP;
+
+ priv->level = level;
+ len = sizeof(u64);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -146,6 +189,9 @@ static int nft_socket_dump(struct sk_buff *skb,
return -1;
if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
return -1;
+ if (priv->key == NFT_SOCKET_CGROUPV2 &&
+ nla_put_u32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
+ return -1;
return 0;
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 43a5a780a6d3..accef672088c 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -263,6 +263,29 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
return 0;
}
+static void nft_tproxy_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_tproxy *priv = nft_expr_priv(expr);
+
+ switch (priv->family) {
+ case NFPROTO_IPV4:
+ nf_defrag_ipv4_disable(ctx->net);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ nf_defrag_ipv6_disable(ctx->net);
+ break;
+#endif
+ case NFPROTO_UNSPEC:
+ nf_defrag_ipv4_disable(ctx->net);
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ nf_defrag_ipv6_disable(ctx->net);
+#endif
+ break;
+ }
+}
+
static int nft_tproxy_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
@@ -288,6 +311,7 @@ static const struct nft_expr_ops nft_tproxy_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_tproxy)),
.eval = nft_tproxy_eval,
.init = nft_tproxy_init,
+ .destroy = nft_tproxy_destroy,
.dump = nft_tproxy_dump,
};
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b7f8d2ed3cc2..84e58ee501a4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -52,7 +52,7 @@ struct xt_af {
struct mutex mutex;
struct list_head match;
struct list_head target;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct mutex compat_mutex;
struct compat_delta *compat_tab;
unsigned int number; /* number of slots in compat_tab[] */
@@ -647,7 +647,7 @@ static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
return usersize == kernsize && strnlen(msg, msglen) < msglen;
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
struct xt_af *xp = &xt[af];
@@ -850,7 +850,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
__alignof__(struct compat_xt_entry_match));
}
EXPORT_SYMBOL(xt_compat_check_entry_offsets);
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
/**
* xt_check_entry_offsets - validate arp/ip/ip6t_entry
@@ -868,7 +868,7 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets);
* match structures are aligned, and that the last structure ends where
* the target structure begins.
*
- * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
+ * Also see xt_compat_check_entry_offsets for CONFIG_NETFILTER_XTABLES_COMPAT version.
*
* The arp/ip/ip6t_entry structure @base must have passed following tests:
* - it must point to a valid memory location
@@ -1059,7 +1059,7 @@ void *xt_copy_counters(sockptr_t arg, unsigned int len,
void *mem;
u64 size;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
if (in_compat_syscall()) {
/* structures only differ in size due to alignment */
struct compat_xt_counters_info compat_tmp;
@@ -1106,7 +1106,7 @@ void *xt_copy_counters(sockptr_t arg, unsigned int len,
}
EXPORT_SYMBOL_GPL(xt_copy_counters);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
{
u_int16_t csize = target->compatsize ? : target->targetsize;
@@ -1199,6 +1199,23 @@ void xt_free_table_info(struct xt_table_info *info)
}
EXPORT_SYMBOL(xt_free_table_info);
+struct xt_table *xt_find_table(struct net *net, u8 af, const char *name)
+{
+ struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+ struct xt_table *t;
+
+ mutex_lock(&xt[af].mutex);
+ list_for_each_entry(t, &xt_net->tables[af], list) {
+ if (strcmp(t->name, name) == 0) {
+ mutex_unlock(&xt[af].mutex);
+ return t;
+ }
+ }
+ mutex_unlock(&xt[af].mutex);
+ return NULL;
+}
+EXPORT_SYMBOL(xt_find_table);
+
/* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
@@ -1276,7 +1293,7 @@ void xt_table_unlock(struct xt_table *table)
}
EXPORT_SYMBOL_GPL(xt_table_unlock);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
void xt_compat_lock(u_int8_t af)
{
mutex_lock(&xt[af].compat_mutex);
@@ -1481,6 +1498,7 @@ void *xt_unregister_table(struct xt_table *table)
mutex_unlock(&xt[table->af].mutex);
audit_log_nfcfg(table->name, table->af, private->number,
AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
+ kfree(table->ops);
kfree(table);
return private;
@@ -1913,7 +1931,7 @@ static int __init xt_init(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++) {
mutex_init(&xt[i].mutex);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
mutex_init(&xt[i].compat_mutex);
xt[i].compat_tab = NULL;
#endif
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 194dc03341f3..459d0696c91a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -200,6 +200,11 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
return -EINVAL;
}
+
+static void tproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_defrag_ipv6_disable(par->net);
+}
#endif
static int tproxy_tg4_check(const struct xt_tgchk_param *par)
@@ -219,6 +224,11 @@ static int tproxy_tg4_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
+static void tproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_defrag_ipv4_disable(par->net);
+}
+
static struct xt_target tproxy_tg_reg[] __read_mostly = {
{
.name = "TPROXY",
@@ -228,6 +238,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
.revision = 0,
.targetsize = sizeof(struct xt_tproxy_target_info),
.checkentry = tproxy_tg4_check,
+ .destroy = tproxy_tg4_destroy,
.hooks = 1 << NF_INET_PRE_ROUTING,
.me = THIS_MODULE,
},
@@ -239,6 +250,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
.revision = 1,
.targetsize = sizeof(struct xt_tproxy_target_info_v1),
.checkentry = tproxy_tg4_check,
+ .destroy = tproxy_tg4_destroy,
.hooks = 1 << NF_INET_PRE_ROUTING,
.me = THIS_MODULE,
},
@@ -251,6 +263,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
.revision = 1,
.targetsize = sizeof(struct xt_tproxy_target_info_v1),
.checkentry = tproxy_tg6_check,
+ .destroy = tproxy_tg6_destroy,
.hooks = 1 << NF_INET_PRE_ROUTING,
.me = THIS_MODULE,
},
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index bd1dea9c7b88..24d4afb9988d 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -134,7 +134,7 @@ static void limit_mt_destroy(const struct xt_mtdtor_param *par)
kfree(info->master);
}
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
struct compat_xt_rateinfo {
u_int32_t avg;
u_int32_t burst;
@@ -176,7 +176,7 @@ static int limit_mt_compat_to_user(void __user *dst, const void *src)
};
return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
}
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
static struct xt_match limit_mt_reg __read_mostly = {
.name = "limit",
@@ -186,7 +186,7 @@ static struct xt_match limit_mt_reg __read_mostly = {
.checkentry = limit_mt_check,
.destroy = limit_mt_destroy,
.matchsize = sizeof(struct xt_rateinfo),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
.compatsize = sizeof(struct compat_xt_rateinfo),
.compat_from_user = limit_mt_compat_from_user,
.compat_to_user = limit_mt_compat_to_user,
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5f973987265d..5e6459e11605 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -216,6 +216,14 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par)
return 0;
}
+static void socket_mt_destroy(const struct xt_mtdtor_param *par)
+{
+ if (par->family == NFPROTO_IPV4)
+ nf_defrag_ipv4_disable(par->net);
+ else if (par->family == NFPROTO_IPV6)
+ nf_defrag_ipv4_disable(par->net);
+}
+
static struct xt_match socket_mt_reg[] __read_mostly = {
{
.name = "socket",
@@ -231,6 +239,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.revision = 1,
.family = NFPROTO_IPV4,
.match = socket_mt4_v1_v2_v3,
+ .destroy = socket_mt_destroy,
.checkentry = socket_mt_v1_check,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -245,6 +254,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.match = socket_mt6_v1_v2_v3,
.checkentry = socket_mt_v1_check,
.matchsize = sizeof(struct xt_socket_mtinfo1),
+ .destroy = socket_mt_destroy,
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
@@ -256,6 +266,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.match = socket_mt4_v1_v2_v3,
.checkentry = socket_mt_v2_check,
+ .destroy = socket_mt_destroy,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN),
@@ -268,6 +279,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV6,
.match = socket_mt6_v1_v2_v3,
.checkentry = socket_mt_v2_check,
+ .destroy = socket_mt_destroy,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN),
@@ -280,6 +292,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV4,
.match = socket_mt4_v1_v2_v3,
.checkentry = socket_mt_v3_check,
+ .destroy = socket_mt_destroy,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN),
@@ -292,6 +305,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
.family = NFPROTO_IPV6,
.match = socket_mt6_v1_v2_v3,
.checkentry = socket_mt_v3_check,
+ .destroy = socket_mt_destroy,
.matchsize = sizeof(struct xt_socket_mtinfo1),
.hooks = (1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN),
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 15424d26e85d..96b524ceabca 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -392,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
*
* Start with a full bucket.
*/
- band->bucket = (band->burst_size + band->rate) * 1000ULL;
+ band->bucket = band->burst_size * 1000ULL;
band_max_delta_t = div_u64(band->bucket, band->rate);
if (band_max_delta_t > meter->max_delta_t)
meter->max_delta_t = band_max_delta_t;
@@ -641,7 +641,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
long long int max_bucket_size;
band = &meter->bands[i];
- max_bucket_size = (band->burst_size + band->rate) * 1000LL;
+ max_bucket_size = band->burst_size * 1000LL;
band->bucket += delta_ms * band->rate;
if (band->bucket > max_bucket_size)
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index 2bf2b1943e61..fa611678af05 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -50,6 +50,9 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
int rc;
+ if (skb->sk)
+ sock_hold(skb->sk);
+
rc = skb_linearize(skb);
if (rc)
goto free_skb;
@@ -59,12 +62,11 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
if (rc)
goto free_skb;
- if (skb->sk)
- sock_hold(skb->sk);
-
return rc;
free_skb:
+ if (skb->sk)
+ sock_put(skb->sk);
kfree_skb(skb);
return rc;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 922ed6b91abb..5c91df52b8c2 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -945,6 +945,12 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
list_for_each_entry(entry, &new->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
+
+ if (!cycle) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e4370b1b7494..902cb6dd710b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -733,6 +733,23 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
return t->send_pkt(reply);
}
+/* This function should be called with sk_lock held and SOCK_DONE set */
+static void virtio_transport_remove_sock(struct vsock_sock *vsk)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct virtio_vsock_pkt *pkt, *tmp;
+
+ /* We don't need to take rx_lock, as the socket is closing and we are
+ * removing it.
+ */
+ list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+
+ vsock_remove_sock(vsk);
+}
+
static void virtio_transport_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
@@ -765,7 +782,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
(!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
vsk->close_work_scheduled = false;
- vsock_remove_sock(vsk);
+ virtio_transport_remove_sock(vsk);
/* Release refcnt obtained when we scheduled the timeout */
sock_put(sk);
@@ -828,22 +845,15 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
void virtio_transport_release(struct vsock_sock *vsk)
{
- struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt, *tmp;
struct sock *sk = &vsk->sk;
bool remove_sock = true;
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
- list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
-
if (remove_sock) {
sock_set_flag(sk, SOCK_DONE);
- vsock_remove_sock(vsk);
+ virtio_transport_remove_sock(vsk);
}
}
EXPORT_SYMBOL_GPL(virtio_transport_release);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8b65323207db..1c9ecb18b8e6 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -568,8 +568,7 @@ vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
out:
if (err < 0) {
- pr_err("Could not attach to queue pair with %d\n",
- err);
+ pr_err_once("Could not attach to queue pair with %d\n", err);
err = vmci_transport_error_to_vsock_error(err);
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a71ed664da0a..cd62d4ba87a9 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -30,7 +30,7 @@
#include "xdp_umem.h"
#include "xsk.h"
-#define TX_BATCH_SIZE 16
+#define TX_BATCH_SIZE 32
static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 3f4599c9a202..ef30d2b353b0 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -26,7 +26,7 @@
SEC("kprobe/__netif_receive_skb_core")
int bpf_prog1(struct pt_regs *ctx)
{
- /* attaches to kprobe netif_receive_skb,
+ /* attaches to kprobe __netif_receive_skb_core,
* looks for packets on loobpack device and prints them
*/
char devname[IFNAMSIZ];
@@ -35,7 +35,7 @@ int bpf_prog1(struct pt_regs *ctx)
int len;
/* non-portable! works for the given kernel only */
- skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
+ bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
dev = _(skb->dev);
len = _(skb->len);
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 3b261b0f74f0..667aacb9261c 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -213,6 +213,7 @@ vmlinux_link()
gen_btf()
{
local pahole_ver
+ local extra_paholeopt=
if ! [ -x "$(command -v ${PAHOLE})" ]; then
echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
@@ -227,8 +228,12 @@ gen_btf()
vmlinux_link ${1}
+ if [ "${pahole_ver}" -ge "121" ]; then
+ extra_paholeopt="${extra_paholeopt} --btf_gen_floats"
+ fi
+
info "BTF" ${2}
- LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
+ LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1}
# Create ${2} which contains just .BTF section but no symbols. Add
# SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 62953bbf68b4..385d5c955cf3 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -71,7 +71,9 @@ static const char *btf_var_linkage_str(__u32 linkage)
case BTF_VAR_STATIC:
return "static";
case BTF_VAR_GLOBAL_ALLOCATED:
- return "global-alloc";
+ return "global";
+ case BTF_VAR_GLOBAL_EXTERN:
+ return "extern";
default:
return "(unknown)";
}
@@ -98,26 +100,28 @@ static const char *btf_str(const struct btf *btf, __u32 off)
return btf__name_by_offset(btf, off) ? : "(invalid)";
}
+static int btf_kind_safe(int kind)
+{
+ return kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+}
+
static int dump_btf_type(const struct btf *btf, __u32 id,
const struct btf_type *t)
{
json_writer_t *w = json_wtr;
- int kind, safe_kind;
-
- kind = BTF_INFO_KIND(t->info);
- safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+ int kind = btf_kind(t);
if (json_output) {
jsonw_start_object(w);
jsonw_uint_field(w, "id", id);
- jsonw_string_field(w, "kind", btf_kind_str[safe_kind]);
+ jsonw_string_field(w, "kind", btf_kind_str[btf_kind_safe(kind)]);
jsonw_string_field(w, "name", btf_str(btf, t->name_off));
} else {
- printf("[%u] %s '%s'", id, btf_kind_str[safe_kind],
+ printf("[%u] %s '%s'", id, btf_kind_str[btf_kind_safe(kind)],
btf_str(btf, t->name_off));
}
- switch (BTF_INFO_KIND(t->info)) {
+ switch (kind) {
case BTF_KIND_INT: {
__u32 v = *(__u32 *)(t + 1);
const char *enc;
@@ -300,7 +304,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
break;
}
case BTF_KIND_DATASEC: {
- const struct btf_var_secinfo *v = (const void *)(t+1);
+ const struct btf_var_secinfo *v = (const void *)(t + 1);
+ const struct btf_type *vt;
__u16 vlen = BTF_INFO_VLEN(t->info);
int i;
@@ -322,6 +327,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
} else {
printf("\n\ttype_id=%u offset=%u size=%u",
v->type, v->offset, v->size);
+
+ if (v->type <= btf__get_nr_types(btf)) {
+ vt = btf__type_by_id(btf, v->type);
+ printf(" (%s '%s')",
+ btf_kind_str[btf_kind_safe(btf_kind(vt))],
+ btf_str(btf, vt->name_off));
+ }
}
}
if (json_output)
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index ff3aa0cf3997..f836d115d7d6 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -157,7 +157,7 @@ static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
if (len == 0)
break;
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != nl_pid) {
ret = -LIBBPF_ERRNO__WRNGPID;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 69902603012c..ec6d85a81744 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -312,6 +312,27 @@ union bpf_iter_link_info {
* *ctx_out*, *data_out* (for example, packet data), result of the
* execution *retval*, and *duration* of the test run.
*
+ * The sizes of the buffers provided as input and output
+ * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ * be provided in the corresponding variables *ctx_size_in*,
+ * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ * of these parameters are not provided (ie set to NULL), the
+ * corresponding size field must be zero.
+ *
+ * Some program types have particular requirements:
+ *
+ * **BPF_PROG_TYPE_SK_LOOKUP**
+ * *data_in* and *data_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_XDP**
+ * *ctx_in* and *ctx_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ * *ctx_out*, *data_in* and *data_out* must be NULL.
+ * *repeat* must be zero.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
@@ -4061,12 +4082,20 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * An adaptive notification is a notification sent whenever the user-space
+ * process has caught up and consumed all available payloads. In case the user-space
+ * process is still processing a previous payload, then no notification is needed
+ * as it will process the newly added payload automatically.
* Return
* 0 on success, or a negative error in case of failure.
*
* void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
* Description
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * *flags* must be 0.
* Return
* Valid pointer with *size* bytes of memory available; NULL,
* otherwise.
@@ -4078,6 +4107,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4088,6 +4121,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -4578,7 +4615,7 @@ union bpf_attr {
*
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
* Description
- * Check ctx packet size against exceeding MTU of net device (based
+ * Check packet size against exceeding MTU of net device (based
* on *ifindex*). This helper will likely be used in combination
* with helpers that adjust/change the packet size.
*
@@ -4595,6 +4632,14 @@ union bpf_attr {
* against the current net device. This is practical if this isn't
* used prior to redirect.
*
+ * On input *mtu_len* must be a valid pointer, else verifier will
+ * reject BPF program. If the value *mtu_len* is initialized to
+ * zero then the ctx packet size is use. When value *mtu_len* is
+ * provided as input this specify the L3 length that the MTU check
+ * is done against. Remember XDP and TC length operate at L2, but
+ * this value is L3 as this correlate to MTU and IP-header tot_len
+ * values which are L3 (similar behavior as bpf_fib_lookup).
+ *
* The Linux kernel route table can configure MTUs on a more
* specific per route level, which is not provided by this helper.
* For route level MTU checks use the **bpf_fib_lookup**\ ()
@@ -4619,11 +4664,9 @@ union bpf_attr {
*
* On return *mtu_len* pointer contains the MTU value of the net
* device. Remember the net device configured MTU is the L3 size,
- * which is returned here and XDP and TX length operate at L2.
+ * which is returned here and XDP and TC length operate at L2.
* Helper take this into account for you, but remember when using
- * MTU value in your BPF-code. On input *mtu_len* must be a valid
- * pointer and be initialized (to zero), else verifier will reject
- * BPF program.
+ * MTU value in your BPF-code.
*
* Return
* * 0 on success, and populate MTU value in *mtu_len* pointer.
@@ -4665,6 +4708,33 @@ union bpf_attr {
* Return
* The number of traversed map elements for success, **-EINVAL** for
* invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ * Description
+ * Outputs a string into the **str** buffer of size **str_size**
+ * based on a format string stored in a read-only map pointed by
+ * **fmt**.
+ *
+ * Each format specifier in **fmt** corresponds to one u64 element
+ * in the **data** array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data*
+ * array. The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ * memory. Reading kernel memory may fail due to either invalid
+ * address or valid address but requiring a major memory fault. If
+ * reading kernel memory fails, the string for **%s** will be an
+ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ * Not returning error to bpf program is consistent with what
+ * **bpf_trace_printk**\ () does for now.
+ *
+ * Return
+ * The strictly positive length of the formatted string, including
+ * the trailing zero character. If the return value is greater than
+ * **str_size**, **str** contains a truncated string, guaranteed to
+ * be zero-terminated except when **str_size** is 0.
+ *
+ * Or **-EBUSY** if the per-CPU memory copy buffer is busy.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4832,6 +4902,7 @@ union bpf_attr {
FN(sock_from_file), \
FN(check_mtu), \
FN(for_each_map_elem), \
+ FN(snprintf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5373,6 +5444,8 @@ struct bpf_link_info {
} raw_tracepoint;
struct {
__u32 attach_type;
+ __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+ __u32 target_btf_id; /* BTF type id inside the object */
} tracing;
struct {
__u64 cgroup_id;
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index cc2e51c64a54..9720dc0b4605 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -25,9 +25,16 @@
/*
* Helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
- * are interpreted by elf_bpf loader
+ * are interpreted by libbpf depending on the context (BPF programs, BPF maps,
+ * extern variables, etc).
+ * To allow use of SEC() with externs (e.g., for extern .maps declarations),
+ * make sure __attribute__((unused)) doesn't trigger compilation warning.
*/
-#define SEC(NAME) __attribute__((section(NAME), used))
+#define SEC(name) \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \
+ __attribute__((section(name), used)) \
+ _Pragma("GCC diagnostic pop") \
/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
#undef __always_inline
@@ -40,6 +47,14 @@
#define __weak __attribute__((weak))
#endif
+/*
+ * Use __hidden attribute to mark a non-static BPF subprogram effectively
+ * static for BPF verifier's verification algorithm purposes, allowing more
+ * extensive and permissive BPF verification process, taking into account
+ * subprogram's caller context.
+ */
+#define __hidden __attribute__((visibility("hidden")))
+
/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
* any system-level headers (such as stddef.h, linux/version.h, etc), and
* commonly-used macros like NULL and KERNEL_VERSION aren't available through
@@ -51,7 +66,7 @@
#endif
#ifndef KERNEL_VERSION
-#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))
+#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
#endif
/*
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index f9ef37707888..8c954ebc0c7c 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -413,20 +413,56 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
/*
* BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
* in a structure.
*/
-#define BPF_SEQ_PRINTF(seq, fmt, args...) \
- ({ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[] = { args }; \
- _Pragma("GCC diagnostic pop") \
- int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
- ___param, sizeof(___param)); \
- ___ret; \
- })
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_snprintf(out, out_size, ___fmt, \
+ ___param, sizeof(___param)); \
+})
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d30e67e7e1e5..d57e13a13798 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1605,11 +1605,6 @@ static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
btf->hdr->type_len, UINT_MAX, add_sz);
}
-static __u32 btf_type_info(int kind, int vlen, int kflag)
-{
- return (kflag << 31) | (kind << 24) | vlen;
-}
-
static void btf_type_inc_vlen(struct btf_type *t)
{
t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7aad78dbb4b4..a1cddd17af7d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -69,8 +69,7 @@
#define __printf(a, b) __attribute__((format(printf, a, b)))
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static const struct btf_type *
-skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
static int __base_pr(enum libbpf_print_level level, const char *format,
va_list args)
@@ -195,7 +194,6 @@ struct reloc_desc {
int insn_idx;
int map_idx;
int sym_off;
- bool processed;
};
struct bpf_sec_def;
@@ -275,6 +273,7 @@ struct bpf_program {
bpf_program_clear_priv_t clear_priv;
bool load;
+ bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
int prog_ifindex;
@@ -501,8 +500,6 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym);
void bpf_program__unload(struct bpf_program *prog)
{
@@ -643,25 +640,29 @@ static int
bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
const char *sec_name, int sec_idx)
{
+ Elf_Data *symbols = obj->efile.symbols;
struct bpf_program *prog, *progs;
void *data = sec_data->d_buf;
- size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
- int nr_progs, err;
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
+ int nr_progs, err, i;
const char *name;
GElf_Sym sym;
progs = obj->programs;
nr_progs = obj->nr_programs;
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
sec_off = 0;
- while (sec_off < sec_sz) {
- if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
- pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
- sec_name, sec_off);
- return -LIBBPF_ERRNO__FORMAT;
- }
+ for (i = 0; i < nr_syms; i++) {
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != sec_idx)
+ continue;
+ if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+ continue;
prog_sz = sym.st_size;
+ sec_off = sym.st_value;
name = elf_sym_str(obj, sym.st_name);
if (!name) {
@@ -699,10 +700,17 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
if (err)
return err;
+ /* if function is a global/weak symbol, but has hidden
+ * visibility (STV_HIDDEN), mark its BTF FUNC as static to
+ * enable more permissive BPF verification mode with more
+ * outside context available to BPF verifier
+ */
+ if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
+ && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN)
+ prog->mark_btf_static = true;
+
nr_progs++;
obj->nr_programs = nr_progs;
-
- sec_off += prog_sz;
}
return 0;
@@ -1896,7 +1904,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
return 0;
}
-static const struct btf_type *
+const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
@@ -1951,16 +1959,11 @@ static const char *__btf_kind_str(__u16 kind)
}
}
-static const char *btf_kind_str(const struct btf_type *t)
+const char *btf_kind_str(const struct btf_type *t)
{
return __btf_kind_str(btf_kind(t));
}
-static enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
-{
- return (enum btf_func_linkage)BTF_INFO_VLEN(t->info);
-}
-
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -2015,254 +2018,262 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
return bpf_map__set_pin_path(map, buf);
}
-
-static int parse_btf_map_def(struct bpf_object *obj,
- struct bpf_map *map,
- const struct btf_type *def,
- bool strict, bool is_inner,
- const char *pin_root_path)
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def)
{
const struct btf_type *t;
const struct btf_member *m;
+ bool is_inner = inner_def == NULL;
int vlen, i;
- vlen = btf_vlen(def);
- m = btf_members(def);
+ vlen = btf_vlen(def_t);
+ m = btf_members(def_t);
for (i = 0; i < vlen; i++, m++) {
- const char *name = btf__name_by_offset(obj->btf, m->name_off);
+ const char *name = btf__name_by_offset(btf, m->name_off);
if (!name) {
- pr_warn("map '%s': invalid field #%d.\n", map->name, i);
+ pr_warn("map '%s': invalid field #%d.\n", map_name, i);
return -EINVAL;
}
if (strcmp(name, "type") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.type))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
return -EINVAL;
- pr_debug("map '%s': found type = %u.\n",
- map->name, map->def.type);
+ map_def->parts |= MAP_DEF_MAP_TYPE;
} else if (strcmp(name, "max_entries") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.max_entries))
+ if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
return -EINVAL;
- pr_debug("map '%s': found max_entries = %u.\n",
- map->name, map->def.max_entries);
+ map_def->parts |= MAP_DEF_MAX_ENTRIES;
} else if (strcmp(name, "map_flags") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.map_flags))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
return -EINVAL;
- pr_debug("map '%s': found map_flags = %u.\n",
- map->name, map->def.map_flags);
+ map_def->parts |= MAP_DEF_MAP_FLAGS;
} else if (strcmp(name, "numa_node") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+ if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
return -EINVAL;
- pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
+ map_def->parts |= MAP_DEF_NUMA_NODE;
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found key_size = %u.\n",
- map->name, sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %u.\n",
- map->name, map->def.key_size, sz);
+ map_name, map_def->key_size, sz);
return -EINVAL;
}
- map->def.key_size = sz;
+ map_def->key_size = sz;
+ map_def->parts |= MAP_DEF_KEY_SIZE;
} else if (strcmp(name, "key") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': key type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': key spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found key [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %zd.\n",
- map->name, map->def.key_size, (ssize_t)sz);
+ map_name, map_def->key_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.key_size = sz;
- map->btf_key_type_id = t->type;
+ map_def->key_size = sz;
+ map_def->key_type_id = t->type;
+ map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
} else if (strcmp(name, "value_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found value_size = %u.\n",
- map->name, sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %u.\n",
- map->name, map->def.value_size, sz);
+ map_name, map_def->value_size, sz);
return -EINVAL;
}
- map->def.value_size = sz;
+ map_def->value_size = sz;
+ map_def->parts |= MAP_DEF_VALUE_SIZE;
} else if (strcmp(name, "value") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': value type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': value spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found value [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %zd.\n",
- map->name, map->def.value_size, (ssize_t)sz);
+ map_name, map_def->value_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.value_size = sz;
- map->btf_value_type_id = t->type;
+ map_def->value_size = sz;
+ map_def->value_type_id = t->type;
+ map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
}
else if (strcmp(name, "values") == 0) {
+ char inner_map_name[128];
int err;
if (is_inner) {
pr_warn("map '%s': multi-level inner maps not supported.\n",
- map->name);
+ map_name);
return -ENOTSUP;
}
if (i != vlen - 1) {
pr_warn("map '%s': '%s' member should be last.\n",
- map->name, name);
+ map_name, name);
return -EINVAL;
}
- if (!bpf_map_type__is_map_in_map(map->def.type)) {
+ if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
pr_warn("map '%s': should be map-in-map.\n",
- map->name);
+ map_name);
return -ENOTSUP;
}
- if (map->def.value_size && map->def.value_size != 4) {
+ if (map_def->value_size && map_def->value_size != 4) {
pr_warn("map '%s': conflicting value size %u != 4.\n",
- map->name, map->def.value_size);
+ map_name, map_def->value_size);
return -EINVAL;
}
- map->def.value_size = 4;
- t = btf__type_by_id(obj->btf, m->type);
+ map_def->value_size = 4;
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_array(t) || btf_array(t)->nelems) {
pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
- map->name);
+ map_name);
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
- NULL);
+ t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
if (!btf_is_ptr(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ t = skip_mods_and_typedefs(btf, t->type, NULL);
if (!btf_is_struct(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- map->inner_map = calloc(1, sizeof(*map->inner_map));
- if (!map->inner_map)
- return -ENOMEM;
- map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
- map->inner_map->name = malloc(strlen(map->name) +
- sizeof(".inner") + 1);
- if (!map->inner_map->name)
- return -ENOMEM;
- sprintf(map->inner_map->name, "%s.inner", map->name);
-
- err = parse_btf_map_def(obj, map->inner_map, t, strict,
- true /* is_inner */, NULL);
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
+ err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
if (err)
return err;
+
+ map_def->parts |= MAP_DEF_INNER_MAP;
} else if (strcmp(name, "pinning") == 0) {
__u32 val;
- int err;
if (is_inner) {
- pr_debug("map '%s': inner def can't be pinned.\n",
- map->name);
+ pr_warn("map '%s': inner def can't be pinned.\n", map_name);
return -EINVAL;
}
- if (!get_map_field_int(map->name, obj->btf, m, &val))
+ if (!get_map_field_int(map_name, btf, m, &val))
return -EINVAL;
- pr_debug("map '%s': found pinning = %u.\n",
- map->name, val);
-
- if (val != LIBBPF_PIN_NONE &&
- val != LIBBPF_PIN_BY_NAME) {
+ if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
pr_warn("map '%s': invalid pinning value %u.\n",
- map->name, val);
+ map_name, val);
return -EINVAL;
}
- if (val == LIBBPF_PIN_BY_NAME) {
- err = build_map_pin_path(map, pin_root_path);
- if (err) {
- pr_warn("map '%s': couldn't build pin path.\n",
- map->name);
- return err;
- }
- }
+ map_def->pinning = val;
+ map_def->parts |= MAP_DEF_PINNING;
} else {
if (strict) {
- pr_warn("map '%s': unknown field '%s'.\n",
- map->name, name);
+ pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
return -ENOTSUP;
}
- pr_debug("map '%s': ignoring unknown field '%s'.\n",
- map->name, name);
+ pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
}
}
- if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
- pr_warn("map '%s': map type isn't specified.\n", map->name);
+ if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
+ pr_warn("map '%s': map type isn't specified.\n", map_name);
return -EINVAL;
}
return 0;
}
+static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
+{
+ map->def.type = def->map_type;
+ map->def.key_size = def->key_size;
+ map->def.value_size = def->value_size;
+ map->def.max_entries = def->max_entries;
+ map->def.map_flags = def->map_flags;
+
+ map->numa_node = def->numa_node;
+ map->btf_key_type_id = def->key_type_id;
+ map->btf_value_type_id = def->value_type_id;
+
+ if (def->parts & MAP_DEF_MAP_TYPE)
+ pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
+
+ if (def->parts & MAP_DEF_KEY_TYPE)
+ pr_debug("map '%s': found key [%u], sz = %u.\n",
+ map->name, def->key_type_id, def->key_size);
+ else if (def->parts & MAP_DEF_KEY_SIZE)
+ pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
+
+ if (def->parts & MAP_DEF_VALUE_TYPE)
+ pr_debug("map '%s': found value [%u], sz = %u.\n",
+ map->name, def->value_type_id, def->value_size);
+ else if (def->parts & MAP_DEF_VALUE_SIZE)
+ pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
+
+ if (def->parts & MAP_DEF_MAX_ENTRIES)
+ pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
+ if (def->parts & MAP_DEF_MAP_FLAGS)
+ pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags);
+ if (def->parts & MAP_DEF_PINNING)
+ pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
+ if (def->parts & MAP_DEF_NUMA_NODE)
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
+
+ if (def->parts & MAP_DEF_INNER_MAP)
+ pr_debug("map '%s': found inner map definition.\n", map->name);
+}
+
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
const struct btf_type *sec,
int var_idx, int sec_idx,
const Elf_Data *data, bool strict,
const char *pin_root_path)
{
+ struct btf_map_def map_def = {}, inner_def = {};
const struct btf_type *var, *def;
const struct btf_var_secinfo *vi;
const struct btf_var *var_extra;
const char *map_name;
struct bpf_map *map;
+ int err;
vi = btf_var_secinfos(sec) + var_idx;
var = btf__type_by_id(obj->btf, vi->type);
@@ -2316,7 +2327,35 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
map_name, map->sec_idx, map->sec_offset);
- return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+ err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
+ if (err)
+ return err;
+
+ fill_map_from_def(map, &map_def);
+
+ if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
+ err = build_map_pin_path(map, pin_root_path);
+ if (err) {
+ pr_warn("map '%s': couldn't build pin path.\n", map->name);
+ return err;
+ }
+ }
+
+ if (map_def.parts & MAP_DEF_INNER_MAP) {
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
+ if (!map->inner_map)
+ return -ENOMEM;
+ map->inner_map->fd = -1;
+ map->inner_map->sec_idx = sec_idx;
+ map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
+ if (!map->inner_map->name)
+ return -ENOMEM;
+ sprintf(map->inner_map->name, "%s.inner", map_name);
+
+ fill_map_from_def(map->inner_map, &inner_def);
+ }
+
+ return 0;
}
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
@@ -2618,7 +2657,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
struct btf *kern_btf = obj->btf;
bool btf_mandatory, sanitize;
- int err = 0;
+ int i, err = 0;
if (!obj->btf)
return 0;
@@ -2632,6 +2671,38 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return 0;
}
+ /* Even though some subprogs are global/weak, user might prefer more
+ * permissive BPF verification process that BPF verifier performs for
+ * static functions, taking into account more context from the caller
+ * functions. In such case, they need to mark such subprogs with
+ * __attribute__((visibility("hidden"))) and libbpf will adjust
+ * corresponding FUNC BTF type to be marked as static and trigger more
+ * involved BPF verification process.
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *prog = &obj->programs[i];
+ struct btf_type *t;
+ const char *name;
+ int j, n;
+
+ if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
+ continue;
+
+ n = btf__get_nr_types(obj->btf);
+ for (j = 1; j <= n; j++) {
+ t = btf_type_by_id(obj->btf, j);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
+ continue;
+
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, prog->name) != 0)
+ continue;
+
+ t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
+ break;
+ }
+ }
+
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
const void *raw_data;
@@ -2782,26 +2853,6 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
return data;
}
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym)
-{
- Elf_Data *symbols = obj->efile.symbols;
- size_t n = symbols->d_size / sizeof(GElf_Sym);
- int i;
-
- for (i = 0; i < n; i++) {
- if (!gelf_getsym(symbols, i, sym))
- continue;
- if (sym->st_shndx != sec_idx || sym->st_value != off)
- continue;
- if (GELF_ST_TYPE(sym->st_info) != sym_type)
- continue;
- return 0;
- }
-
- return -ENOENT;
-}
-
static bool is_sec_name_dwarf(const char *name)
{
/* approximation, but the actual list is too long */
@@ -3498,8 +3549,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
const char *sym_sec_name;
struct bpf_map *map;
- reloc_desc->processed = false;
-
if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code);
@@ -3682,11 +3731,16 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
int err, i, nrels;
const char *sym_name;
__u32 insn_idx;
+ Elf_Scn *scn;
+ Elf_Data *scn_data;
GElf_Sym sym;
GElf_Rel rel;
+ scn = elf_sec_by_idx(obj, sec_idx);
+ scn_data = elf_sec_data(obj, scn);
+
relo_sec_name = elf_sec_str(obj, shdr->sh_name);
- sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ sec_name = elf_sec_name(obj, scn);
if (!relo_sec_name || !sec_name)
return -EINVAL;
@@ -3704,7 +3758,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (rel.r_offset % BPF_INSN_SZ) {
+
+ if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) {
pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
@@ -3728,9 +3783,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
if (!prog) {
- pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+ pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
relo_sec_name, i, sec_name, insn_idx);
- return -LIBBPF_ERRNO__RELOC;
+ continue;
}
relos = libbpf_reallocarray(prog->reloc_desc,
@@ -3845,6 +3900,14 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
return map->def.max_entries;
}
+struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
+{
+ if (!bpf_map_type__is_map_in_map(map->def.type))
+ return NULL;
+
+ return map->inner_map;
+}
+
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
if (map->fd >= 0)
@@ -6305,13 +6368,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
case RELO_LD64:
insn[0].src_reg = BPF_PSEUDO_MAP_FD;
insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
break;
case RELO_DATA:
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
insn[1].imm = insn[0].imm + relo->sym_off;
insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
break;
case RELO_EXTERN_VAR:
ext = &obj->externs[relo->sym_off];
@@ -6329,13 +6390,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[1].imm = ext->ksym.addr >> 32;
}
}
- relo->processed = true;
break;
case RELO_EXTERN_FUNC:
ext = &obj->externs[relo->sym_off];
insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
insn[0].imm = ext->ksym.kernel_btf_id;
- relo->processed = true;
break;
case RELO_SUBPROG_ADDR:
insn[0].src_reg = BPF_PSEUDO_FUNC;
@@ -6621,9 +6680,6 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* different main programs */
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
- if (relo)
- relo->processed = true;
-
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
}
@@ -6716,7 +6772,7 @@ static int
bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_program *subprog;
- int i, j, err;
+ int i, err;
/* mark all subprogs as not relocated (yet) within the context of
* current main program
@@ -6727,9 +6783,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
continue;
subprog->sub_insn_off = 0;
- for (j = 0; j < subprog->nr_reloc; j++)
- if (subprog->reloc_desc[j].type == RELO_CALL)
- subprog->reloc_desc[j].processed = false;
}
err = bpf_object__reloc_code(obj, prog, prog);
@@ -6976,7 +7029,7 @@ static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id
return false;
}
-static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_insn *insn = prog->insns;
enum bpf_func_id func_id;
@@ -9476,6 +9529,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
pr_warn("error: inner_map_fd already specified\n");
return -EINVAL;
}
+ zfree(&map->inner_map);
map->inner_map_fd = fd;
return 0;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f500621d28e5..bec4e6a6e31d 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -480,6 +480,7 @@ LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
LIBBPF_API long libbpf_get_error(const void *ptr);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f5990f7208ce..b9b29baf1df8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,5 +359,6 @@ LIBBPF_0.4.0 {
bpf_linker__finalize;
bpf_linker__free;
bpf_linker__new;
+ bpf_map__inner_map;
bpf_object__set_kversion;
} LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 6017902c687e..ee426226928f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -19,6 +19,7 @@
#pragma GCC poison reallocarray
#include "libbpf.h"
+#include "btf.h"
#ifndef EM_BPF
#define EM_BPF 247
@@ -131,6 +132,50 @@ struct btf;
struct btf_type;
struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+const char *btf_kind_str(const struct btf_type *t);
+const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+
+static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+ return (enum btf_func_linkage)(int)btf_vlen(t);
+}
+
+static inline __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+ return (kflag << 31) | (kind << 24) | vlen;
+}
+
+enum map_def_parts {
+ MAP_DEF_MAP_TYPE = 0x001,
+ MAP_DEF_KEY_TYPE = 0x002,
+ MAP_DEF_KEY_SIZE = 0x004,
+ MAP_DEF_VALUE_TYPE = 0x008,
+ MAP_DEF_VALUE_SIZE = 0x010,
+ MAP_DEF_MAX_ENTRIES = 0x020,
+ MAP_DEF_MAP_FLAGS = 0x040,
+ MAP_DEF_NUMA_NODE = 0x080,
+ MAP_DEF_PINNING = 0x100,
+ MAP_DEF_INNER_MAP = 0x200,
+
+ MAP_DEF_ALL = 0x3ff, /* combination of all above */
+};
+
+struct btf_map_def {
+ enum map_def_parts parts;
+ __u32 map_type;
+ __u32 key_type_id;
+ __u32 key_size;
+ __u32 value_type_id;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ __u32 numa_node;
+ __u32 pinning;
+};
+
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def);
void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
size_t cur_cnt, size_t max_cnt, size_t add_cnt);
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 46b16cbdcda3..9de084b1c699 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -22,6 +22,8 @@
#include "libbpf_internal.h"
#include "strset.h"
+#define BTF_EXTERN_SEC ".extern"
+
struct src_sec {
const char *sec_name;
/* positional (not necessarily ELF) index in an array of sections */
@@ -74,11 +76,36 @@ struct btf_ext_sec_data {
void *recs;
};
+struct glob_sym {
+ /* ELF symbol index */
+ int sym_idx;
+ /* associated section id for .ksyms, .kconfig, etc, but not .extern */
+ int sec_id;
+ /* extern name offset in STRTAB */
+ int name_off;
+ /* optional associated BTF type ID */
+ int btf_id;
+ /* BTF type ID to which VAR/FUNC type is pointing to; used for
+ * rewriting types when extern VAR/FUNC is resolved to a concrete
+ * definition
+ */
+ int underlying_btf_id;
+ /* sec_var index in the corresponding dst_sec, if exists */
+ int var_idx;
+
+ /* extern or resolved/global symbol */
+ bool is_extern;
+ /* weak or strong symbol, never goes back from strong to weak */
+ bool is_weak;
+};
+
struct dst_sec {
char *sec_name;
/* positional (not necessarily ELF) index in an array of sections */
int id;
+ bool ephemeral;
+
/* ELF info */
size_t sec_idx;
Elf_Scn *scn;
@@ -120,22 +147,28 @@ struct bpf_linker {
struct btf *btf;
struct btf_ext *btf_ext;
+
+ /* global (including extern) ELF symbols */
+ int glob_sym_cnt;
+ struct glob_sym *glob_syms;
};
#define pr_warn_elf(fmt, ...) \
-do { \
- libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)); \
-} while (0)
+ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1))
static int init_output_elf(struct bpf_linker *linker, const char *file);
static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
static int linker_sanity_check_btf(struct src_obj *obj);
static int linker_sanity_check_btf_ext(struct src_obj *obj);
static int linker_fixup_btf(struct src_obj *obj);
static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx);
static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj);
static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj);
@@ -282,7 +315,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
/* ELF header */
linker->elf_hdr = elf64_newehdr(linker->elf);
- if (!linker->elf_hdr){
+ if (!linker->elf_hdr) {
pr_warn_elf("failed to create ELF header");
return -EINVAL;
}
@@ -663,8 +696,8 @@ static bool is_pow_of_2(size_t x)
static int linker_sanity_check_elf(struct src_obj *obj)
{
- struct src_sec *sec, *link_sec;
- int i, j, n;
+ struct src_sec *sec;
+ int i, err;
if (!obj->symtab_sec_idx) {
pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename);
@@ -692,43 +725,11 @@ static int linker_sanity_check_elf(struct src_obj *obj)
return -EINVAL;
switch (sec->shdr->sh_type) {
- case SHT_SYMTAB: {
- Elf64_Sym *sym;
-
- if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
- return -EINVAL;
- if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
- return -EINVAL;
-
- if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
- pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
- return -EINVAL;
- }
- link_sec = &obj->secs[sec->shdr->sh_link];
- if (link_sec->shdr->sh_type != SHT_STRTAB) {
- pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
- return -EINVAL;
- }
-
- n = sec->shdr->sh_size / sec->shdr->sh_entsize;
- sym = sec->data->d_buf;
- for (j = 0; j < n; j++, sym++) {
- if (sym->st_shndx
- && sym->st_shndx < SHN_LORESERVE
- && sym->st_shndx >= obj->sec_cnt) {
- pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
- j, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
- return -EINVAL;
- }
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) {
- if (sym->st_value != 0)
- return -EINVAL;
- }
- }
+ case SHT_SYMTAB:
+ err = linker_sanity_check_elf_symtab(obj, sec);
+ if (err)
+ return err;
break;
- }
case SHT_STRTAB:
break;
case SHT_PROGBITS:
@@ -739,87 +740,169 @@ static int linker_sanity_check_elf(struct src_obj *obj)
break;
case SHT_NOBITS:
break;
- case SHT_REL: {
- Elf64_Rel *relo;
- struct src_sec *sym_sec;
+ case SHT_REL:
+ err = linker_sanity_check_elf_relos(obj, sec);
+ if (err)
+ return err;
+ break;
+ case SHT_LLVM_ADDRSIG:
+ break;
+ default:
+ pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
+ sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+ return -EINVAL;
+ }
+ }
- if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
- return -EINVAL;
- if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
- return -EINVAL;
+ return 0;
+}
- /* SHT_REL's sh_link should point to SYMTAB */
- if (sec->shdr->sh_link != obj->symtab_sec_idx) {
- pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
- return -EINVAL;
- }
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec;
+ Elf64_Sym *sym;
+ int i, n;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
+ pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_link];
+ if (link_sec->shdr->sh_type != SHT_STRTAB) {
+ pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
- /* SHT_REL's sh_info points to relocated section */
- if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
- pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ sym = sec->data->d_buf;
+ for (i = 0; i < n; i++, sym++) {
+ int sym_type = ELF64_ST_TYPE(sym->st_info);
+ int sym_bind = ELF64_ST_BIND(sym->st_info);
+ int sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+
+ if (i == 0) {
+ if (sym->st_name != 0 || sym->st_info != 0
+ || sym->st_other != 0 || sym->st_shndx != 0
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #0 is invalid in %s\n", obj->filename);
return -EINVAL;
}
- link_sec = &obj->secs[sec->shdr->sh_info];
+ continue;
+ }
+ if (sym_bind != STB_LOCAL && sym_bind != STB_GLOBAL && sym_bind != STB_WEAK) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol binding %d\n",
+ i, sec->sec_idx, sym_bind);
+ return -EINVAL;
+ }
+ if (sym_vis != STV_DEFAULT && sym_vis != STV_HIDDEN) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol visibility %d\n",
+ i, sec->sec_idx, sym_vis);
+ return -EINVAL;
+ }
+ if (sym->st_shndx == 0) {
+ if (sym_type != STT_NOTYPE || sym_bind == STB_LOCAL
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #%d is invalid extern symbol in %s\n",
+ i, obj->filename);
- /* .rel<secname> -> <secname> pattern is followed */
- if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
- || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
- pr_warn("ELF relo section #%zu name has invalid name in %s\n",
- sec->sec_idx, obj->filename);
return -EINVAL;
}
+ continue;
+ }
+ if (sym->st_shndx < SHN_LORESERVE && sym->st_shndx >= obj->sec_cnt) {
+ pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
+ i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
+ return -EINVAL;
+ }
+ if (sym_type == STT_SECTION) {
+ if (sym->st_value != 0)
+ return -EINVAL;
+ continue;
+ }
+ }
- /* don't further validate relocations for ignored sections */
- if (link_sec->skipped)
- break;
+ return 0;
+}
- /* relocatable section is data or instructions */
- if (link_sec->shdr->sh_type != SHT_PROGBITS
- && link_sec->shdr->sh_type != SHT_NOBITS) {
- pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
- sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
- return -EINVAL;
- }
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec, *sym_sec;
+ Elf64_Rel *relo;
+ int i, n;
- /* check sanity of each relocation */
- n = sec->shdr->sh_size / sec->shdr->sh_entsize;
- relo = sec->data->d_buf;
- sym_sec = &obj->secs[obj->symtab_sec_idx];
- for (j = 0; j < n; j++, relo++) {
- size_t sym_idx = ELF64_R_SYM(relo->r_info);
- size_t sym_type = ELF64_R_TYPE(relo->r_info);
-
- if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
- pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
- j, sec->sec_idx, sym_type, obj->filename);
- return -EINVAL;
- }
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
- if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
- pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
- j, sec->sec_idx, sym_idx, obj->filename);
- return -EINVAL;
- }
+ /* SHT_REL's sh_link should point to SYMTAB */
+ if (sec->shdr->sh_link != obj->symtab_sec_idx) {
+ pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
- if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
- if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
- pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
- j, sec->sec_idx, sym_idx, obj->filename);
- return -EINVAL;
- }
- }
- }
- break;
+ /* SHT_REL's sh_info points to relocated section */
+ if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
+ pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_info];
+
+ /* .rel<secname> -> <secname> pattern is followed */
+ if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
+ || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
+ pr_warn("ELF relo section #%zu name has invalid name in %s\n",
+ sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ /* don't further validate relocations for ignored sections */
+ if (link_sec->skipped)
+ return 0;
+
+ /* relocatable section is data or instructions */
+ if (link_sec->shdr->sh_type != SHT_PROGBITS && link_sec->shdr->sh_type != SHT_NOBITS) {
+ pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+
+ /* check sanity of each relocation */
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ relo = sec->data->d_buf;
+ sym_sec = &obj->secs[obj->symtab_sec_idx];
+ for (i = 0; i < n; i++, relo++) {
+ size_t sym_idx = ELF64_R_SYM(relo->r_info);
+ size_t sym_type = ELF64_R_TYPE(relo->r_info);
+
+ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+ pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
+ i, sec->sec_idx, sym_type, obj->filename);
+ return -EINVAL;
}
- case SHT_LLVM_ADDRSIG:
- break;
- default:
- pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
- sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+
+ if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
+ pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
return -EINVAL;
}
+
+ if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
+ pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+ }
}
return 0;
@@ -897,6 +980,7 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s
dst_sec->sec_sz = 0;
dst_sec->sec_idx = 0;
+ dst_sec->ephemeral = src_sec->ephemeral;
/* ephemeral sections are just thin section shells lacking most parts */
if (src_sec->ephemeral)
@@ -904,13 +988,13 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s
scn = elf_newscn(linker->elf);
if (!scn)
- return -1;
+ return -ENOMEM;
data = elf_newdata(scn);
if (!data)
- return -1;
+ return -ENOMEM;
shdr = elf64_getshdr(scn);
if (!shdr)
- return -1;
+ return -ENOMEM;
dst_sec->scn = scn;
dst_sec->shdr = shdr;
@@ -960,6 +1044,9 @@ static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const cha
static bool secs_match(struct dst_sec *dst, struct src_sec *src)
{
+ if (dst->ephemeral || src->ephemeral)
+ return true;
+
if (dst->shdr->sh_type != src->shdr->sh_type) {
pr_warn("sec %s types mismatch\n", dst->sec_name);
return false;
@@ -985,13 +1072,33 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec
return true;
}
-static int extend_sec(struct dst_sec *dst, struct src_sec *src)
+static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src)
{
void *tmp;
- size_t dst_align = dst->shdr->sh_addralign;
- size_t src_align = src->shdr->sh_addralign;
+ size_t dst_align, src_align;
size_t dst_align_sz, dst_final_sz;
+ int err;
+
+ /* Ephemeral source section doesn't contribute anything to ELF
+ * section data.
+ */
+ if (src->ephemeral)
+ return 0;
+
+ /* Some sections (like .maps) can contain both externs (and thus be
+ * ephemeral) and non-externs (map definitions). So it's possible that
+ * it has to be "upgraded" from ephemeral to non-ephemeral when the
+ * first non-ephemeral entity appears. In such case, we add ELF
+ * section, data, etc.
+ */
+ if (dst->ephemeral) {
+ err = init_sec(linker, dst, src);
+ if (err)
+ return err;
+ }
+ dst_align = dst->shdr->sh_addralign;
+ src_align = src->shdr->sh_addralign;
if (dst_align == 0)
dst_align = 1;
if (dst_align < src_align)
@@ -1087,10 +1194,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
/* record mapped section index */
src_sec->dst_id = dst_sec->id;
- if (src_sec->ephemeral)
- continue;
-
- err = extend_sec(dst_sec, src_sec);
+ err = extend_sec(linker, dst_sec, src_sec);
if (err)
return err;
}
@@ -1101,68 +1205,778 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj)
{
struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
- Elf64_Sym *sym = symtab->data->d_buf, *dst_sym;
- int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+ Elf64_Sym *sym = symtab->data->d_buf;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize, err;
int str_sec_idx = symtab->shdr->sh_link;
+ const char *sym_name;
obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map));
if (!obj->sym_map)
return -ENOMEM;
for (i = 0; i < n; i++, sym++) {
- struct src_sec *src_sec = NULL;
- struct dst_sec *dst_sec = NULL;
- const char *sym_name;
- size_t dst_sym_idx;
- int name_off;
-
- /* we already have all-zero initial symbol */
- if (sym->st_name == 0 && sym->st_info == 0 &&
- sym->st_other == 0 && sym->st_shndx == SHN_UNDEF &&
- sym->st_value == 0 && sym->st_size ==0)
+ /* We already validated all-zero symbol #0 and we already
+ * appended it preventively to the final SYMTAB, so skip it.
+ */
+ if (i == 0)
continue;
sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
if (!sym_name) {
pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename);
- return -1;
+ return -EINVAL;
+ }
+
+ err = linker_append_elf_sym(linker, obj, sym, sym_name, i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static Elf64_Sym *get_sym_by_idx(struct bpf_linker *linker, size_t sym_idx)
+{
+ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+ Elf64_Sym *syms = symtab->raw_data;
+
+ return &syms[sym_idx];
+}
+
+static struct glob_sym *find_glob_sym(struct bpf_linker *linker, const char *sym_name)
+{
+ struct glob_sym *glob_sym;
+ const char *name;
+ int i;
+
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ glob_sym = &linker->glob_syms[i];
+ name = strset__data(linker->strtab_strs) + glob_sym->name_off;
+
+ if (strcmp(name, sym_name) == 0)
+ return glob_sym;
+ }
+
+ return NULL;
+}
+
+static struct glob_sym *add_glob_sym(struct bpf_linker *linker)
+{
+ struct glob_sym *syms, *sym;
+
+ syms = libbpf_reallocarray(linker->glob_syms, linker->glob_sym_cnt + 1,
+ sizeof(*linker->glob_syms));
+ if (!syms)
+ return NULL;
+
+ sym = &syms[linker->glob_sym_cnt];
+ memset(sym, 0, sizeof(*sym));
+ sym->var_idx = -1;
+
+ linker->glob_syms = syms;
+ linker->glob_sym_cnt++;
+
+ return sym;
+}
+
+static bool glob_sym_btf_matches(const char *sym_name, bool exact,
+ const struct btf *btf1, __u32 id1,
+ const struct btf *btf2, __u32 id2)
+{
+ const struct btf_type *t1, *t2;
+ bool is_static1, is_static2;
+ const char *n1, *n2;
+ int i, n;
+
+recur:
+ n1 = n2 = NULL;
+ t1 = skip_mods_and_typedefs(btf1, id1, &id1);
+ t2 = skip_mods_and_typedefs(btf2, id2, &id2);
+
+ /* check if only one side is FWD, otherwise handle with common logic */
+ if (!exact && btf_is_fwd(t1) != btf_is_fwd(t2)) {
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible forward declaration names '%s' and '%s'\n",
+ sym_name, n1, n2);
+ return false;
}
+ /* validate if FWD kind matches concrete kind */
+ if (btf_is_fwd(t1)) {
+ if (btf_kflag(t1) && btf_is_union(t2))
+ return true;
+ if (!btf_kflag(t1) && btf_is_struct(t2))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t1) ? "union" : "struct", btf_kind_str(t2));
+ } else {
+ if (btf_kflag(t2) && btf_is_union(t1))
+ return true;
+ if (!btf_kflag(t2) && btf_is_struct(t1))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t2) ? "union" : "struct", btf_kind_str(t1));
+ }
+ return false;
+ }
+
+ if (btf_kind(t1) != btf_kind(t2)) {
+ pr_warn("global '%s': incompatible BTF kinds %s and %s\n",
+ sym_name, btf_kind_str(t1), btf_kind_str(t2));
+ return false;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible %s names '%s' and '%s'\n",
+ sym_name, btf_kind_str(t1), n1, n2);
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_UNKN: /* void */
+ case BTF_KIND_FWD:
+ return true;
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ /* ignore encoding for int and enum values for enum */
+ if (t1->size != t2->size) {
+ pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
+ sym_name, btf_kind_str(t1), n1, t1->size, t2->size);
+ return false;
+ }
+ return true;
+ case BTF_KIND_PTR:
+ /* just validate overall shape of the referenced type, so no
+ * contents comparison for struct/union, and allowd fwd vs
+ * struct/union
+ */
+ exact = false;
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ /* ignore index type and array size */
+ id1 = btf_array(t1)->type;
+ id2 = btf_array(t2)->type;
+ goto recur;
+ case BTF_KIND_FUNC:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_func_linkage(t1) == BTF_FUNC_STATIC;
+ is_static2 = btf_func_linkage(t2) == BTF_FUNC_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible func '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_VAR:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_var(t1)->linkage == BTF_VAR_STATIC;
+ is_static2 = btf_var(t2)->linkage == BTF_VAR_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible var '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m1, *m2;
+
+ if (!exact)
+ return true;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s fields %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ n1 = btf__str_by_offset(btf1, m1->name_off);
+ n2 = btf__str_by_offset(btf2, m2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible field #%d names '%s' and '%s'\n",
+ sym_name, i, n1, n2);
+ return false;
+ }
+ if (m1->offset != m2->offset) {
+ pr_warn("global '%s': incompatible field #%d ('%s') offsets\n",
+ sym_name, i, n1);
+ return false;
+ }
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ return true;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *m1, *m2;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s params %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ /* ignore func arg names */
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ /* now check return type as well */
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ }
+
+ /* skip_mods_and_typedefs() make this impossible */
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ /* DATASECs are never compared with each other */
+ case BTF_KIND_DATASEC:
+ default:
+ pr_warn("global '%s': unsupported BTF kind %s\n",
+ sym_name, btf_kind_str(t1));
+ return false;
+ }
+}
+
+static bool map_defs_match(const char *sym_name,
+ const struct btf *main_btf,
+ const struct btf_map_def *main_def,
+ const struct btf_map_def *main_inner_def,
+ const struct btf *extra_btf,
+ const struct btf_map_def *extra_def,
+ const struct btf_map_def *extra_inner_def)
+{
+ const char *reason;
+
+ if (main_def->map_type != extra_def->map_type) {
+ reason = "type";
+ goto mismatch;
+ }
+
+ /* check key type/size match */
+ if (main_def->key_size != extra_def->key_size) {
+ reason = "key_size";
+ goto mismatch;
+ }
+ if (!!main_def->key_type_id != !!extra_def->key_type_id) {
+ reason = "key type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_KEY_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->key_type_id,
+ extra_btf, extra_def->key_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ /* validate value type/size match */
+ if (main_def->value_size != extra_def->value_size) {
+ reason = "value_size";
+ goto mismatch;
+ }
+ if (!!main_def->value_type_id != !!extra_def->value_type_id) {
+ reason = "value type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_VALUE_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->value_type_id,
+ extra_btf, extra_def->value_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ if (main_def->max_entries != extra_def->max_entries) {
+ reason = "max_entries";
+ goto mismatch;
+ }
+ if (main_def->map_flags != extra_def->map_flags) {
+ reason = "map_flags";
+ goto mismatch;
+ }
+ if (main_def->numa_node != extra_def->numa_node) {
+ reason = "numa_node";
+ goto mismatch;
+ }
+ if (main_def->pinning != extra_def->pinning) {
+ reason = "pinning";
+ goto mismatch;
+ }
+
+ if ((main_def->parts & MAP_DEF_INNER_MAP) != (extra_def->parts & MAP_DEF_INNER_MAP)) {
+ reason = "inner map";
+ goto mismatch;
+ }
- if (sym->st_shndx && sym->st_shndx < SHN_LORESERVE) {
- src_sec = &obj->secs[sym->st_shndx];
- if (src_sec->skipped)
+ if (main_def->parts & MAP_DEF_INNER_MAP) {
+ char inner_map_name[128];
+
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", sym_name);
+
+ return map_defs_match(inner_map_name,
+ main_btf, main_inner_def, NULL,
+ extra_btf, extra_inner_def, NULL);
+ }
+
+ return true;
+
+mismatch:
+ pr_warn("global '%s': map %s mismatch\n", sym_name, reason);
+ return false;
+}
+
+static bool glob_map_defs_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, int btf_id)
+{
+ struct btf_map_def dst_def = {}, dst_inner_def = {};
+ struct btf_map_def src_def = {}, src_inner_def = {};
+ const struct btf_type *t;
+ int err;
+
+ t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(t)) {
+ pr_warn("global '%s': invalid map definition type [%d]\n", sym_name, btf_id);
+ return false;
+ }
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+
+ err = parse_btf_map_def(sym_name, obj->btf, t, true /*strict*/, &src_def, &src_inner_def);
+ if (err) {
+ pr_warn("global '%s': invalid map definition\n", sym_name);
+ return false;
+ }
+
+ /* re-parse existing map definition */
+ t = btf__type_by_id(linker->btf, glob_sym->btf_id);
+ t = skip_mods_and_typedefs(linker->btf, t->type, NULL);
+ err = parse_btf_map_def(sym_name, linker->btf, t, true /*strict*/, &dst_def, &dst_inner_def);
+ if (err) {
+ /* this should not happen, because we already validated it */
+ pr_warn("global '%s': invalid dst map definition\n", sym_name);
+ return false;
+ }
+
+ /* Currently extern map definition has to be complete and match
+ * concrete map definition exactly. This restriction might be lifted
+ * in the future.
+ */
+ return map_defs_match(sym_name, linker->btf, &dst_def, &dst_inner_def,
+ obj->btf, &src_def, &src_inner_def);
+}
+
+static bool glob_syms_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id)
+{
+ const struct btf_type *src_t;
+
+ /* if we are dealing with externs, BTF types describing both global
+ * and extern VARs/FUNCs should be completely present in all files
+ */
+ if (!glob_sym->btf_id || !btf_id) {
+ pr_warn("BTF info is missing for global symbol '%s'\n", sym_name);
+ return false;
+ }
+
+ src_t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(src_t) && !btf_is_func(src_t)) {
+ pr_warn("only extern variables and functions are supported, but got '%s' for '%s'\n",
+ btf_kind_str(src_t), sym_name);
+ return false;
+ }
+
+ /* deal with .maps definitions specially */
+ if (glob_sym->sec_id && strcmp(linker->secs[glob_sym->sec_id].sec_name, MAPS_ELF_SEC) == 0)
+ return glob_map_defs_match(sym_name, linker, glob_sym, obj, sym, btf_id);
+
+ if (!glob_sym_btf_matches(sym_name, true /*exact*/,
+ linker->btf, glob_sym->btf_id, obj->btf, btf_id))
+ return false;
+
+ return true;
+}
+
+static bool btf_is_non_static(const struct btf_type *t)
+{
+ return (btf_is_var(t) && btf_var(t)->linkage != BTF_VAR_STATIC)
+ || (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_STATIC);
+}
+
+static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name,
+ int *out_btf_sec_id, int *out_btf_id)
+{
+ int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0;
+ const struct btf_type *t;
+ const struct btf_var_secinfo *vi;
+ const char *name;
+
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(obj->btf, i);
+
+ /* some global and extern FUNCs and VARs might not be associated with any
+ * DATASEC, so try to detect them in the same pass
+ */
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, sym_name) != 0)
continue;
- dst_sec = &linker->secs[src_sec->dst_id];
- /* allow only one STT_SECTION symbol per section */
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sec->sec_sym_idx) {
- obj->sym_map[i] = dst_sec->sec_sym_idx;
+ /* remember and still try to find DATASEC */
+ btf_id = i;
+ continue;
+ }
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ vi = btf_var_secinfos(t);
+ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+ t = btf__type_by_id(obj->btf, vi->type);
+ name = btf__str_by_offset(obj->btf, t->name_off);
+
+ if (strcmp(name, sym_name) != 0)
+ continue;
+ if (btf_is_var(t) && btf_var(t)->linkage == BTF_VAR_STATIC)
continue;
+ if (btf_is_func(t) && btf_func_linkage(t) == BTF_FUNC_STATIC)
+ continue;
+
+ if (btf_id && btf_id != vi->type) {
+ pr_warn("global/extern '%s' BTF is ambiguous: both types #%d and #%u match\n",
+ sym_name, btf_id, vi->type);
+ return -EINVAL;
}
+
+ *out_btf_sec_id = i;
+ *out_btf_id = vi->type;
+
+ return 0;
}
+ }
+
+ /* free-floating extern or global FUNC */
+ if (btf_id) {
+ *out_btf_sec_id = 0;
+ *out_btf_id = btf_id;
+ return 0;
+ }
- name_off = strset__add_str(linker->strtab_strs, sym_name);
- if (name_off < 0)
- return name_off;
+ pr_warn("failed to find BTF info for global/extern symbol '%s'\n", sym_name);
+ return -ENOENT;
+}
- dst_sym = add_new_sym(linker, &dst_sym_idx);
- if (!dst_sym)
- return -ENOMEM;
+static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *sec;
+ int i;
- dst_sym->st_name = name_off;
- dst_sym->st_info = sym->st_info;
- dst_sym->st_other = sym->st_other;
- dst_sym->st_shndx = src_sec ? dst_sec->sec_idx : sym->st_shndx;
- dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
- dst_sym->st_size = sym->st_size;
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static int complete_extern_btf_info(struct btf *dst_btf, int dst_id,
+ struct btf *src_btf, int src_id)
+{
+ struct btf_type *dst_t = btf_type_by_id(dst_btf, dst_id);
+ struct btf_type *src_t = btf_type_by_id(src_btf, src_id);
+ struct btf_param *src_p, *dst_p;
+ const char *s;
+ int i, n, off;
+
+ /* We already made sure that source and destination types (FUNC or
+ * VAR) match in terms of types and argument names.
+ */
+ if (btf_is_var(dst_t)) {
+ btf_var(dst_t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ return 0;
+ }
+
+ dst_t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_GLOBAL, 0);
+
+ /* now onto FUNC_PROTO types */
+ src_t = btf_type_by_id(src_btf, src_t->type);
+ dst_t = btf_type_by_id(dst_btf, dst_t->type);
+
+ /* Fill in all the argument names, which for extern FUNCs are missing.
+ * We'll end up with two copies of FUNCs/VARs for externs, but that
+ * will be taken care of by BTF dedup at the very end.
+ * It might be that BTF types for extern in one file has less/more BTF
+ * information (e.g., FWD instead of full STRUCT/UNION information),
+ * but that should be (in most cases, subject to BTF dedup rules)
+ * handled and resolved by BTF dedup algorithm as well, so we won't
+ * worry about it. Our only job is to make sure that argument names
+ * are populated on both sides, otherwise BTF dedup will pedantically
+ * consider them different.
+ */
+ src_p = btf_params(src_t);
+ dst_p = btf_params(dst_t);
+ for (i = 0, n = btf_vlen(dst_t); i < n; i++, src_p++, dst_p++) {
+ if (!src_p->name_off)
+ continue;
+
+ /* src_btf has more complete info, so add name to dst_btf */
+ s = btf__str_by_offset(src_btf, src_p->name_off);
+ off = btf__add_str(dst_btf, s);
+ if (off < 0)
+ return off;
+ dst_p->name_off = off;
+ }
+ return 0;
+}
+
+static void sym_update_bind(Elf64_Sym *sym, int sym_bind)
+{
+ sym->st_info = ELF64_ST_INFO(sym_bind, ELF64_ST_TYPE(sym->st_info));
+}
+
+static void sym_update_type(Elf64_Sym *sym, int sym_type)
+{
+ sym->st_info = ELF64_ST_INFO(ELF64_ST_BIND(sym->st_info), sym_type);
+}
+
+static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
+{
+ /* libelf doesn't provide setters for ST_VISIBILITY,
+ * but it is stored in the lower 2 bits of st_other
+ */
+ sym->st_other &= 0x03;
+ sym->st_other |= sym_vis;
+}
+
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx)
+{
+ struct src_sec *src_sec = NULL;
+ struct dst_sec *dst_sec = NULL;
+ struct glob_sym *glob_sym = NULL;
+ int name_off, sym_type, sym_bind, sym_vis, err;
+ int btf_sec_id = 0, btf_id = 0;
+ size_t dst_sym_idx;
+ Elf64_Sym *dst_sym;
+ bool sym_is_extern;
+
+ sym_type = ELF64_ST_TYPE(sym->st_info);
+ sym_bind = ELF64_ST_BIND(sym->st_info);
+ sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+ sym_is_extern = sym->st_shndx == SHN_UNDEF;
+
+ if (sym_is_extern) {
+ if (!obj->btf) {
+ pr_warn("externs without BTF info are not supported\n");
+ return -ENOTSUP;
+ }
+ } else if (sym->st_shndx < SHN_LORESERVE) {
+ src_sec = &obj->secs[sym->st_shndx];
+ if (src_sec->skipped)
+ return 0;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* allow only one STT_SECTION symbol per section */
+ if (sym_type == STT_SECTION && dst_sec->sec_sym_idx) {
+ obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
+ return 0;
+ }
+ }
+
+ if (sym_bind == STB_LOCAL)
+ goto add_sym;
+
+ /* find matching BTF info */
+ err = find_glob_sym_btf(obj, sym, sym_name, &btf_sec_id, &btf_id);
+ if (err)
+ return err;
+
+ if (sym_is_extern && btf_sec_id) {
+ const char *sec_name = NULL;
+ const struct btf_type *t;
+
+ t = btf__type_by_id(obj->btf, btf_sec_id);
+ sec_name = btf__str_by_offset(obj->btf, t->name_off);
+
+ /* Clang puts unannotated extern vars into
+ * '.extern' BTF DATASEC. Treat them the same
+ * as unannotated extern funcs (which are
+ * currently not put into any DATASECs).
+ * Those don't have associated src_sec/dst_sec.
+ */
+ if (strcmp(sec_name, BTF_EXTERN_SEC) != 0) {
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("failed to find matching ELF sec '%s'\n", sec_name);
+ return -ENOENT;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+ }
+ }
+
+ glob_sym = find_glob_sym(linker, sym_name);
+ if (glob_sym) {
+ /* Preventively resolve to existing symbol. This is
+ * needed for further relocation symbol remapping in
+ * the next step of linking.
+ */
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+
+ /* If both symbols are non-externs, at least one of
+ * them has to be STB_WEAK, otherwise they are in
+ * a conflict with each other.
+ */
+ if (!sym_is_extern && !glob_sym->is_extern
+ && !glob_sym->is_weak && sym_bind != STB_WEAK) {
+ pr_warn("conflicting non-weak symbol #%d (%s) definition in '%s'\n",
+ src_sym_idx, sym_name, obj->filename);
+ return -EINVAL;
+ }
- obj->sym_map[i] = dst_sym_idx;
+ if (!glob_syms_match(sym_name, linker, glob_sym, obj, sym, src_sym_idx, btf_id))
+ return -EINVAL;
+
+ dst_sym = get_sym_by_idx(linker, glob_sym->sym_idx);
- if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sym) {
- dst_sec->sec_sym_idx = dst_sym_idx;
- dst_sym->st_value = 0;
+ /* If new symbol is strong, then force dst_sym to be strong as
+ * well; this way a mix of weak and non-weak extern
+ * definitions will end up being strong.
+ */
+ if (sym_bind == STB_GLOBAL) {
+ /* We still need to preserve type (NOTYPE or
+ * OBJECT/FUNC, depending on whether the symbol is
+ * extern or not)
+ */
+ sym_update_bind(dst_sym, STB_GLOBAL);
+ glob_sym->is_weak = false;
}
+ /* Non-default visibility is "contaminating", with stricter
+ * visibility overwriting more permissive ones, even if more
+ * permissive visibility comes from just an extern definition.
+ * Currently only STV_DEFAULT and STV_HIDDEN are allowed and
+ * ensured by ELF symbol sanity checks above.
+ */
+ if (sym_vis > ELF64_ST_VISIBILITY(dst_sym->st_other))
+ sym_update_visibility(dst_sym, sym_vis);
+
+ /* If the new symbol is extern, then regardless if
+ * existing symbol is extern or resolved global, just
+ * keep the existing one untouched.
+ */
+ if (sym_is_extern)
+ return 0;
+
+ /* If existing symbol is a strong resolved symbol, bail out,
+ * because we lost resolution battle have nothing to
+ * contribute. We already checked abover that there is no
+ * strong-strong conflict. We also already tightened binding
+ * and visibility, so nothing else to contribute at that point.
+ */
+ if (!glob_sym->is_extern && sym_bind == STB_WEAK)
+ return 0;
+
+ /* At this point, new symbol is strong non-extern,
+ * so overwrite glob_sym with new symbol information.
+ * Preserve binding and visibility.
+ */
+ sym_update_type(dst_sym, sym_type);
+ dst_sym->st_shndx = dst_sec->sec_idx;
+ dst_sym->st_value = src_sec->dst_off + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ /* see comment below about dst_sec->id vs dst_sec->sec_idx */
+ glob_sym->sec_id = dst_sec->id;
+ glob_sym->is_extern = false;
+
+ if (complete_extern_btf_info(linker->btf, glob_sym->btf_id,
+ obj->btf, btf_id))
+ return -EINVAL;
+
+ /* request updating VAR's/FUNC's underlying BTF type when appending BTF type */
+ glob_sym->underlying_btf_id = 0;
+
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+ return 0;
+ }
+
+add_sym:
+ name_off = strset__add_str(linker->strtab_strs, sym_name);
+ if (name_off < 0)
+ return name_off;
+
+ dst_sym = add_new_sym(linker, &dst_sym_idx);
+ if (!dst_sym)
+ return -ENOMEM;
+
+ dst_sym->st_name = name_off;
+ dst_sym->st_info = sym->st_info;
+ dst_sym->st_other = sym->st_other;
+ dst_sym->st_shndx = dst_sec ? dst_sec->sec_idx : sym->st_shndx;
+ dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ obj->sym_map[src_sym_idx] = dst_sym_idx;
+
+ if (sym_type == STT_SECTION && dst_sym) {
+ dst_sec->sec_sym_idx = dst_sym_idx;
+ dst_sym->st_value = 0;
+ }
+
+ if (sym_bind != STB_LOCAL) {
+ glob_sym = add_glob_sym(linker);
+ if (!glob_sym)
+ return -ENOMEM;
+
+ glob_sym->sym_idx = dst_sym_idx;
+ /* we use dst_sec->id (and not dst_sec->sec_idx), because
+ * ephemeral sections (.kconfig, .ksyms, etc) don't have
+ * sec_idx (as they don't have corresponding ELF section), but
+ * still have id. .extern doesn't have even ephemeral section
+ * associated with it, so dst_sec->id == dst_sec->sec_idx == 0.
+ */
+ glob_sym->sec_id = dst_sec ? dst_sec->id : 0;
+ glob_sym->name_off = name_off;
+ /* we will fill btf_id in during BTF merging step */
+ glob_sym->btf_id = 0;
+ glob_sym->is_extern = sym_is_extern;
+ glob_sym->is_weak = sym_bind == STB_WEAK;
}
return 0;
@@ -1200,7 +2014,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
return err;
}
} else if (!secs_match(dst_sec, src_sec)) {
- pr_warn("Secs %s are not compatible\n", src_sec->sec_name);
+ pr_warn("sections %s are not compatible\n", src_sec->sec_name);
return -1;
}
@@ -1212,7 +2026,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
dst_sec->shdr->sh_info = dst_linked_sec->sec_idx;
src_sec->dst_id = dst_sec->id;
- err = extend_sec(dst_sec, src_sec);
+ err = extend_sec(linker, dst_sec, src_sec);
if (err)
return err;
@@ -1265,21 +2079,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
return 0;
}
-static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
-{
- struct src_sec *sec;
- int i;
-
- for (i = 1; i < obj->sec_cnt; i++) {
- sec = &obj->secs[i];
-
- if (strcmp(sec->sec_name, sec_name) == 0)
- return sec;
- }
-
- return NULL;
-}
-
static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx,
int sym_type, const char *sym_name)
{
@@ -1334,12 +2133,32 @@ static int linker_fixup_btf(struct src_obj *obj)
t->size = sec->shdr->sh_size;
} else {
/* BTF can have some sections that are not represented
- * in ELF, e.g., .kconfig and .ksyms, which are used
- * for special extern variables. Here we'll
- * pre-create "section shells" for them to be able to
- * keep track of extra per-section metadata later
- * (e.g., BTF variables).
+ * in ELF, e.g., .kconfig, .ksyms, .extern, which are used
+ * for special extern variables.
+ *
+ * For all but one such special (ephemeral)
+ * sections, we pre-create "section shells" to be able
+ * to keep track of extra per-section metadata later
+ * (e.g., those BTF extern variables).
+ *
+ * .extern is even more special, though, because it
+ * contains extern variables that need to be resolved
+ * by static linker, not libbpf and kernel. When such
+ * externs are resolved, we are going to remove them
+ * from .extern BTF section and might end up not
+ * needing it at all. Each resolved extern should have
+ * matching non-extern VAR/FUNC in other sections.
+ *
+ * We do support leaving some of the externs
+ * unresolved, though, to support cases of building
+ * libraries, which will later be linked against final
+ * BPF applications. So if at finalization we still
+ * see unresolved externs, we'll create .extern
+ * section on our own.
*/
+ if (strcmp(sec_name, BTF_EXTERN_SEC) == 0)
+ continue;
+
sec = add_src_sec(obj, sec_name);
if (!sec)
return -ENOMEM;
@@ -1379,6 +2198,13 @@ static int linker_fixup_btf(struct src_obj *obj)
static int remap_type_id(__u32 *type_id, void *ctx)
{
int *id_map = ctx;
+ int new_id = id_map[*type_id];
+
+ /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */
+ if (new_id == 0 && *type_id != 0) {
+ pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id);
+ return -EINVAL;
+ }
*type_id = id_map[*type_id];
@@ -1389,6 +2215,7 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
{
const struct btf_type *t;
int i, j, n, start_id, id;
+ const char *name;
if (!obj->btf)
return 0;
@@ -1401,12 +2228,44 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
return -ENOMEM;
for (i = 1; i <= n; i++) {
+ struct glob_sym *glob_sym = NULL;
+
t = btf__type_by_id(obj->btf, i);
/* DATASECs are handled specially below */
if (btf_kind(t) == BTF_KIND_DATASEC)
continue;
+ if (btf_is_non_static(t)) {
+ /* there should be glob_sym already */
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+
+ /* VARs without corresponding glob_sym are those that
+ * belong to skipped/deduplicated sections (i.e.,
+ * license and version), so just skip them
+ */
+ if (!glob_sym)
+ continue;
+
+ /* linker_append_elf_sym() might have requested
+ * updating underlying type ID, if extern was resolved
+ * to strong symbol or weak got upgraded to non-weak
+ */
+ if (glob_sym->underlying_btf_id == 0)
+ glob_sym->underlying_btf_id = -t->type;
+
+ /* globals from previous object files that match our
+ * VAR/FUNC already have a corresponding associated
+ * BTF type, so just make sure to use it
+ */
+ if (glob_sym->btf_id) {
+ /* reuse existing BTF type for global var/func */
+ obj->btf_type_map[i] = glob_sym->btf_id;
+ continue;
+ }
+ }
+
id = btf__add_type(linker->btf, obj->btf, t);
if (id < 0) {
pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename);
@@ -1414,6 +2273,12 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
}
obj->btf_type_map[i] = id;
+
+ /* record just appended BTF type for var/func */
+ if (glob_sym) {
+ glob_sym->btf_id = id;
+ glob_sym->underlying_btf_id = -t->type;
+ }
}
/* remap all the types except DATASECs */
@@ -1425,6 +2290,22 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
return -EINVAL;
}
+ /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's
+ * actual type), if necessary
+ */
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ struct glob_sym *glob_sym = &linker->glob_syms[i];
+ struct btf_type *glob_t;
+
+ if (glob_sym->underlying_btf_id >= 0)
+ continue;
+
+ glob_sym->underlying_btf_id = obj->btf_type_map[-glob_sym->underlying_btf_id];
+
+ glob_t = btf_type_by_id(linker->btf, glob_sym->btf_id);
+ glob_t->type = glob_sym->underlying_btf_id;
+ }
+
/* append DATASEC info */
for (i = 1; i < obj->sec_cnt; i++) {
struct src_sec *src_sec;
@@ -1452,6 +2333,42 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
n = btf_vlen(t);
for (j = 0; j < n; j++, src_var++) {
void *sec_vars = dst_sec->sec_vars;
+ int new_id = obj->btf_type_map[src_var->type];
+ struct glob_sym *glob_sym = NULL;
+
+ t = btf_type_by_id(linker->btf, new_id);
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(linker->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+ if (glob_sym->sec_id != dst_sec->id) {
+ pr_warn("global '%s': section mismatch %d vs %d\n",
+ name, glob_sym->sec_id, dst_sec->id);
+ return -EINVAL;
+ }
+ }
+
+ /* If there is already a member (VAR or FUNC) mapped
+ * to the same type, don't add a duplicate entry.
+ * This will happen when multiple object files define
+ * the same extern VARs/FUNCs.
+ */
+ if (glob_sym && glob_sym->var_idx >= 0) {
+ __s64 sz;
+
+ dst_var = &dst_sec->sec_vars[glob_sym->var_idx];
+ /* Because underlying BTF type might have
+ * changed, so might its size have changed, so
+ * re-calculate and update it in sec_var.
+ */
+ sz = btf__resolve_size(linker->btf, glob_sym->underlying_btf_id);
+ if (sz < 0) {
+ pr_warn("global '%s': failed to resolve size of underlying type: %d\n",
+ name, (int)sz);
+ return -EINVAL;
+ }
+ dst_var->size = sz;
+ continue;
+ }
sec_vars = libbpf_reallocarray(sec_vars,
dst_sec->sec_var_cnt + 1,
@@ -1466,6 +2383,9 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
dst_var->type = obj->btf_type_map[src_var->type];
dst_var->size = src_var->size;
dst_var->offset = src_sec->dst_off + src_var->offset;
+
+ if (glob_sym)
+ glob_sym->var_idx = dst_sec->sec_var_cnt - 1;
}
}
@@ -1895,7 +2815,7 @@ static int finalize_btf_ext(struct bpf_linker *linker)
hdr->func_info_len = funcs_sz;
hdr->line_info_off = funcs_sz;
hdr->line_info_len = lines_sz;
- hdr->core_relo_off = funcs_sz + lines_sz;;
+ hdr->core_relo_off = funcs_sz + lines_sz;
hdr->core_relo_len = core_relos_sz;
if (funcs_sz) {
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index a402f32a145c..91130648d8e6 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -39,8 +39,6 @@ EXTRA_WARNINGS += -Wundef
EXTRA_WARNINGS += -Wwrite-strings
EXTRA_WARNINGS += -Wformat
-CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
-
# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
# environment or command line. This is necessary for CC and AR
@@ -52,12 +50,22 @@ define allow-override
$(eval $(1) = $(2)))
endef
+ifneq ($(LLVM),)
+$(call allow-override,CC,clang)
+$(call allow-override,AR,llvm-ar)
+$(call allow-override,LD,ld.lld)
+$(call allow-override,CXX,clang++)
+$(call allow-override,STRIP,llvm-strip)
+else
# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,AR,$(CROSS_COMPILE)ar)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
$(call allow-override,CXX,$(CROSS_COMPILE)g++)
$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+endif
+
+CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
ifneq ($(LLVM),)
HOSTAR ?= llvm-ar
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6448c626498f..283e5ad8385e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,13 +21,18 @@ endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -Og -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
+CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lz -lrt -lpthread
+# Silence some warnings when compiled with clang
+ifneq ($(LLVM),)
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_verifier_log test_dev_cgroup \
@@ -182,7 +187,6 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
cp $(SCRATCH_DIR)/runqslower $@
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
-$(TEST_GEN_FILES): docs
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -201,10 +205,12 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
CC=$(HOSTCC) LD=$(HOSTLD) \
- EXTRA_CFLAGS='-g -Og' \
+ EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
+all: docs
+
docs:
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
-f Makefile.docs \
@@ -219,7 +225,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
- EXTRA_CFLAGS='-g -Og' \
+ EXTRA_CFLAGS='-g -O0' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
@@ -227,7 +233,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
- EXTRA_CFLAGS='-g -Og' \
+ EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
@@ -303,9 +309,15 @@ endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
-LINKED_SKELS := test_static_linked.skel.h
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
+ linked_vars.skel.h linked_maps.skel.h
test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
+linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
+linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
+linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+
+LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
@@ -325,7 +337,7 @@ TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
- $$(filter-out $(SKEL_BLACKLIST), \
+ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
@@ -481,7 +493,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 65fe318d1e71..3353778c30f8 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -193,3 +193,12 @@ Without it, the error from compiling bpf selftests looks like:
libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
__ https://reviews.llvm.org/D93563
+
+Clang dependencies for static linking tests
+===========================================
+
+linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
+generate valid BTF information for weak variables. Please make sure you use
+Clang that contains the fix.
+
+__ https://reviews.llvm.org/D100362
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 37e1f303fc11..5192305159ec 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -44,3 +44,5 @@ CONFIG_SECURITYFS=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_READ_POLICY=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_DYNAMIC_FTRACE=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 74c45d557a2b..2d3590cfb5e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -147,6 +147,7 @@ static void test_task_stack(void)
return;
do_dummy_read(skel->progs.dump_task_stack);
+ do_dummy_read(skel->progs.get_task_user_stacks);
bpf_iter_task_stack__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 5c0448910426..63990842d20f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -58,42 +58,73 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
test_cb cb)
{
struct bpf_object *obj = NULL, *tgt_obj;
+ __u32 retval, tgt_prog_id, info_len;
+ struct bpf_prog_info prog_info = {};
struct bpf_program **prog = NULL;
struct bpf_link **link = NULL;
- __u32 duration = 0, retval;
int err, tgt_fd, i;
+ struct btf *btf;
err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
- if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
- target_obj_file, err, errno))
+ if (!ASSERT_OK(err, "tgt_prog_load"))
return;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.attach_prog_fd = tgt_fd,
);
+ info_len = sizeof(prog_info);
+ err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len);
+ if (!ASSERT_OK(err, "tgt_fd_get_info"))
+ goto close_prog;
+
+ tgt_prog_id = prog_info.id;
+ btf = bpf_object__btf(tgt_obj);
+
link = calloc(sizeof(struct bpf_link *), prog_cnt);
+ if (!ASSERT_OK_PTR(link, "link_ptr"))
+ goto close_prog;
+
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
+ if (!ASSERT_OK_PTR(prog, "prog_ptr"))
goto close_prog;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
+ if (!ASSERT_OK(err, "obj_load"))
goto close_prog;
for (i = 0; i < prog_cnt; i++) {
+ struct bpf_link_info link_info;
+ char *tgt_name;
+ __s32 btf_id;
+
+ tgt_name = strstr(prog_name[i], "/");
+ if (!ASSERT_OK_PTR(tgt_name, "tgt_name"))
+ goto close_prog;
+ btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC);
+
prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+ if (!ASSERT_OK_PTR(prog[i], prog_name[i]))
goto close_prog;
+
link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(link[i], "attach_trace"))
goto close_prog;
+
+ info_len = sizeof(link_info);
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]),
+ &link_info, &info_len);
+ ASSERT_OK(err, "link_fd_get_info");
+ ASSERT_EQ(link_info.tracing.attach_type,
+ bpf_program__get_expected_attach_type(prog[i]),
+ "link_attach_type");
+ ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
+ ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
}
if (cb) {
@@ -106,10 +137,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
goto close_prog;
err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ NULL, NULL, &retval, NULL);
+ ASSERT_OK(err, "prog_run");
+ ASSERT_EQ(retval, 0, "prog_run_ret");
if (check_data_map(obj, prog_cnt, false))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index 6c4d42a2386f..ccc7e8a34ab6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -39,7 +39,7 @@ void test_fexit_sleep(void)
goto cleanup;
cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
- if (CHECK(cpid == -1, "clone", strerror(errno)))
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
goto cleanup;
/* wait until first sys_nanosleep ends and second sys_nanosleep starts */
@@ -65,7 +65,7 @@ void test_fexit_sleep(void)
/* kill the thread to unwind sys_nanosleep stack through the trampoline */
kill(cpid, 9);
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
goto cleanup;
if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
new file mode 100644
index 000000000000..e9916f2817ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_funcs.skel.h"
+
+void test_linked_funcs(void)
+{
+ int err;
+ struct linked_funcs *skel;
+
+ skel = linked_funcs__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->my_tid = syscall(SYS_gettid);
+ skel->bss->syscall_id = SYS_getpgid;
+
+ err = linked_funcs__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_funcs__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1");
+ ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1");
+ ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1");
+
+ ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2");
+ ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2");
+ /* output_weak2 should never be updated */
+ ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2");
+
+cleanup:
+ linked_funcs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
new file mode 100644
index 000000000000..85dcaaaf2775
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_maps.skel.h"
+
+void test_linked_maps(void)
+{
+ int err;
+ struct linked_maps *skel;
+
+ skel = linked_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = linked_maps__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1");
+ ASSERT_EQ(skel->bss->output_second1, 2, "output_second1");
+ ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1");
+
+cleanup:
+ linked_maps__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
new file mode 100644
index 000000000000..267166abe4c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_vars.skel.h"
+
+void test_linked_vars(void)
+{
+ int err;
+ struct linked_vars *skel;
+
+ skel = linked_vars__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->input_bss1 = 1000;
+ skel->bss->input_bss2 = 2000;
+ skel->bss->input_bss_weak = 3000;
+
+ err = linked_vars__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_vars__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1");
+ ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2");
+ /* 10 comes from "winner" input_data_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1");
+ ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2");
+ /* 100 comes from "winner" input_rodata_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1");
+ ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2");
+
+cleanup:
+ linked_vars__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index c230a573c373..4972f92205c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -12,11 +12,22 @@ void test_map_ptr(void)
__u32 duration = 0, retval;
char buf[128];
int err;
+ int page_size = getpagesize();
- skel = map_ptr_kern__open_and_load();
- if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ skel = map_ptr_kern__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = map_ptr_kern__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ skel->bss->page_size = page_size;
+
err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
sizeof(pkt_v4), buf, NULL, &retval, NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 9c3c5c0f068f..37b002ca1167 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -29,22 +29,36 @@ void test_mmap(void)
struct test_mmap *skel;
__u64 val = 0;
- skel = test_mmap__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ skel = test_mmap__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ /* at least 4 pages of data */
+ err = bpf_map__set_max_entries(skel->maps.data_map,
+ 4 * (page_size / sizeof(u64)));
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_mmap__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
bss_map = skel->maps.bss;
data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
- tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
- munmap(tmp1, 4096);
+ munmap(tmp1, page_size);
goto cleanup;
}
/* now double-check if it's mmap()'able at all */
- tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 31a3114906e2..2535788e135f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -68,10 +68,10 @@ static void test_ns_current_pid_tgid_new_ns(void)
cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
CLONE_NEWPID | SIGCHLD, NULL);
- if (CHECK(cpid == -1, "clone", strerror(errno)))
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
return;
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
return;
if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index fddbc5db5d6a..de78617f6550 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -87,11 +87,20 @@ void test_ringbuf(void)
pthread_t thread;
long bg_ret = -1;
int err, cnt;
+ int page_size = getpagesize();
- skel = test_ringbuf__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -110,9 +119,9 @@ void test_ringbuf(void)
CHECK(skel->bss->avail_data != 3 * rec_sz,
"err_avail_size", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->avail_data);
- CHECK(skel->bss->ring_size != 4096,
+ CHECK(skel->bss->ring_size != page_size,
"err_ring_size", "exp %ld, got %ld\n",
- 4096L, skel->bss->ring_size);
+ (long)page_size, skel->bss->ring_size);
CHECK(skel->bss->cons_pos != 0,
"err_cons_pos", "exp %ld, got %ld\n",
0L, skel->bss->cons_pos);
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index d37161e59bb2..cef63e703924 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -41,13 +41,42 @@ static int process_sample(void *ctx, void *data, size_t len)
void test_ringbuf_multi(void)
{
struct test_ringbuf_multi *skel;
- struct ring_buffer *ringbuf;
+ struct ring_buffer *ringbuf = NULL;
int err;
+ int page_size = getpagesize();
+ int proto_fd = -1;
- skel = test_ringbuf_multi__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf_multi__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
+ if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
+ if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
+ close(proto_fd);
+ proto_fd = -1;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -97,6 +126,8 @@ void test_ringbuf_multi(void)
2L, skel->bss->total);
cleanup:
+ if (proto_fd >= 0)
+ close(proto_fd);
ring_buffer__free(ringbuf);
test_ringbuf_multi__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
new file mode 100644
index 000000000000..a958c22aec75
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <test_progs.h>
+#include "test_snprintf.skel.h"
+#include "test_snprintf_single.skel.h"
+
+#define EXP_NUM_OUT "-8 9 96 -424242 1337 DABBAD00"
+#define EXP_NUM_RET sizeof(EXP_NUM_OUT)
+
+#define EXP_IP_OUT "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
+#define EXP_IP_RET sizeof(EXP_IP_OUT)
+
+/* The third specifier, %pB, depends on compiler inlining so don't check it */
+#define EXP_SYM_OUT "schedule schedule+0x0/"
+#define MIN_SYM_RET sizeof(EXP_SYM_OUT)
+
+/* The third specifier, %p, is a hashed pointer which changes on every reboot */
+#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
+#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
+
+#define EXP_STR_OUT "str1 longstr"
+#define EXP_STR_RET sizeof(EXP_STR_OUT)
+
+#define EXP_OVER_OUT "%over"
+#define EXP_OVER_RET 10
+
+#define EXP_PAD_OUT " 4 000"
+#define EXP_PAD_RET 900007
+
+#define EXP_NO_ARG_OUT "simple case"
+#define EXP_NO_ARG_RET 12
+
+#define EXP_NO_BUF_RET 29
+
+void test_snprintf_positive(void)
+{
+ char exp_addr_out[] = EXP_ADDR_OUT;
+ char exp_sym_out[] = EXP_SYM_OUT;
+ struct test_snprintf *skel;
+
+ skel = test_snprintf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out");
+ ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret");
+
+ ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out");
+ ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret");
+
+ ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out,
+ sizeof(exp_sym_out) - 1), "sym_out");
+ ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret");
+
+ ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out,
+ sizeof(exp_addr_out) - 1), "addr_out");
+ ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret");
+
+ ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out");
+ ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret");
+
+ ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out");
+ ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret");
+
+ ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out");
+ ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret");
+
+ ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out");
+ ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret");
+
+ ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret");
+
+cleanup:
+ test_snprintf__destroy(skel);
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
+static int load_single_snprintf(char *fmt)
+{
+ struct test_snprintf_single *skel;
+ int ret;
+
+ skel = test_snprintf_single__open();
+ if (!skel)
+ return -EINVAL;
+
+ memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+
+ ret = test_snprintf_single__load(skel);
+ test_snprintf_single__destroy(skel);
+
+ return ret;
+}
+
+void test_snprintf_negative(void)
+{
+ ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
+
+ ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero");
+ ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers");
+ ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1");
+ ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2");
+ ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
+ ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
+ ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+ ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+}
+
+void test_snprintf(void)
+{
+ if (test__start_subtest("snprintf_positive"))
+ test_snprintf_positive();
+ if (test__start_subtest("snprintf_negative"))
+ test_snprintf_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index d5b44b135c00..4b937e5dbaca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -3,6 +3,7 @@
#include "cgroup_helpers.h"
#include <linux/tcp.h>
+#include "sockopt_sk.skel.h"
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -191,60 +192,30 @@ err:
return -1;
}
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+static void run_test(int cgroup_fd)
{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
+ struct sockopt_sk *skel;
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
+ skel = sockopt_sk__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
- if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
- }
-
- return 0;
-}
-
-static void run_test(int cgroup_fd)
-{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_sk.o",
- };
- struct bpf_object *obj;
- int ignored;
- int err;
-
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
- return;
+ skel->bss->page_size = getpagesize();
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._setsockopt =
+ bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link"))
+ goto cleanup;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._getsockopt =
+ bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+ goto cleanup;
- CHECK_FAIL(getsetsockopt());
+ ASSERT_OK(getsetsockopt(), "getsetsockopt");
-close_bpf_object:
- bpf_object__close(obj);
+cleanup:
+ sockopt_sk__destroy(skel);
}
void test_sockopt_sk(void)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 50e59a2e142e..43c36f5f7649 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,3 +35,30 @@ int dump_task_stack(struct bpf_iter__task *ctx)
return 0;
}
+
+SEC("iter/task")
+int get_task_user_stacks(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ uint64_t buf_sz = 0;
+ int64_t res;
+
+ if (task == (void *)0)
+ return 0;
+
+ res = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK);
+ if (res <= 0)
+ return 0;
+
+ buf_sz += res;
+
+ /* If the verifier doesn't refine bpf_get_task_stack res, and instead
+ * assumes res is entirely unknown, this program will fail to load as
+ * the verifier will believe that max buf_sz value allows reading
+ * past the end of entries in bpf_seq_write call
+ */
+ bpf_seq_write(seq, &entries, buf_sz);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
new file mode 100644
index 000000000000..b964ec1390c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between two files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val1;
+int output_ctx1;
+int output_weak1;
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 1;
+}
+
+/* Global functions can't be void */
+int set_output_val1(int x)
+{
+ output_val1 = x + subprog(x);
+ return x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx1(__u64 *ctx)
+{
+ output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should win because it's the first one */
+__weak int set_output_weak(int x)
+{
+ output_weak1 = x;
+ return x;
+}
+
+extern int set_output_val2(int x);
+
+/* here we'll force set_output_ctx2() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx2(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1, struct pt_regs *regs, long id)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ set_output_val2(1000);
+ set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
new file mode 100644
index 000000000000..575e958e60b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between both files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val2;
+int output_ctx2;
+int output_weak2; /* should stay zero */
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+/* Global functions can't be void */
+int set_output_val2(int x)
+{
+ output_val2 = 2 * x + 2 * subprog(x);
+ return 2 * x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx2(__u64 *ctx)
+{
+ output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should lose, because it will be processed second */
+__weak int set_output_weak(int x)
+{
+ output_weak2 = x;
+ return 2 * x;
+}
+
+extern int set_output_val1(int x);
+
+/* here we'll force set_output_ctx1() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx1(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2, struct pt_regs *regs, long id)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ set_output_val1(2000);
+ set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
new file mode 100644
index 000000000000..52291515cc72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct my_key { long x; };
+struct my_value { long x; };
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct my_key);
+ __type(value, struct my_value);
+ __uint(max_entries, 16);
+} map1 SEC(".maps");
+
+ /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't
+ * matter.
+ */
+typedef struct {
+ __uint(max_entries, 8);
+ __type(key, int);
+ __type(value, int);
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+} map2_t;
+
+extern map2_t map2 SEC(".maps");
+
+/* This should be the winning map definition, but we have no way of verifying,
+ * so we just make sure that it links and works without errors
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first1;
+int output_second1;
+int output_weak1;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter1)
+{
+ /* update values with key = 1 */
+ int key = 1, val = 1;
+ struct my_key key_struct = { .x = 1 };
+ struct my_value val_struct = { .x = 1000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit1)
+{
+ /* lookup values with key = 2, set in another file */
+ int key = 2, *val;
+ struct my_key key_struct = { .x = 2 };
+ struct my_value *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first1 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second1 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak1 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c
new file mode 100644
index 000000000000..0693687474ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps2.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* modifiers and typedefs are ignored when comparing key/value types */
+typedef struct my_key { long x; } key_type;
+typedef struct my_value { long x; } value_type;
+
+extern struct {
+ __uint(max_entries, 16);
+ __type(key, key_type);
+ __type(value, value_type);
+ __uint(type, BPF_MAP_TYPE_HASH);
+} map1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 8);
+} map2 SEC(".maps");
+
+/* this definition will lose, but it has to exactly match the winner */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first2;
+int output_second2;
+int output_weak2;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter2)
+{
+ /* update values with key = 2 */
+ int key = 2, val = 2;
+ key_type key_struct = { .x = 2 };
+ value_type val_struct = { .x = 2000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit2)
+{
+ /* lookup values with key = 1, set in another file */
+ int key = 1, *val;
+ key_type key_struct = { .x = 1 };
+ value_type *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first2 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second2 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak2 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c
new file mode 100644
index 000000000000..ef9e9d0bb0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars1.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* this weak extern will be strict due to the other file's strong extern */
+extern bool CONFIG_BPF_SYSCALL __kconfig __weak;
+extern const void bpf_link_fops __ksym __weak;
+
+int input_bss1;
+int input_data1 = 1;
+const volatile int input_rodata1 = 11;
+
+int input_bss_weak __weak;
+/* these two definitions should win */
+int input_data_weak __weak = 10;
+const volatile int input_rodata_weak __weak = 100;
+
+extern int input_bss2;
+extern int input_data2;
+extern const int input_rodata2;
+
+int output_bss1;
+int output_data1;
+int output_rodata1;
+
+long output_sink1;
+
+static __noinline int get_bss_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_bss1 + input_bss2 + input_bss_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1)
+{
+ output_bss1 = get_bss_res();
+ output_data1 = input_data1 + input_data2 + input_data_weak;
+ output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink1 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&bpf_link_fops;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c
new file mode 100644
index 000000000000..e4f5bd388a3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars2.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* when an extern is defined as both strong and weak, resulting symbol will be strong */
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern const void __start_BTF __ksym;
+
+int input_bss2;
+int input_data2 = 2;
+const volatile int input_rodata2 = 22;
+
+int input_bss_weak __weak;
+/* these two weak variables should lose */
+int input_data_weak __weak = 20;
+const volatile int input_rodata_weak __weak = 200;
+
+extern int input_bss1;
+extern int input_data1;
+extern const int input_rodata1;
+
+int output_bss2;
+int output_data2;
+int output_rodata2;
+
+int output_sink2;
+
+static __noinline int get_data_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_data1 + input_data2 + input_data_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2)
+{
+ output_bss2 = input_bss1 + input_bss2 + input_bss_weak;
+ output_data2 = get_data_res();
+ output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink2 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&__start_BTF;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index d8850bc6a9f1..d1d304c980f0 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -12,6 +12,7 @@ _Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
__u32 g_line = 0;
+int page_size = 0; /* userspace should set it */
#define VERIFY_TYPE(type, func) ({ \
g_map_type = type; \
@@ -635,7 +636,6 @@ struct bpf_ringbuf_map {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} m_ringbuf SEC(".maps");
static inline int check_ringbuf(void)
@@ -643,7 +643,7 @@ static inline int check_ringbuf(void)
struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
- VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+ VERIFY(check(&ringbuf->map, map, 0, 0, page_size));
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index fdb4bf4408fa..eeaf6e75c9a2 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -8,18 +8,6 @@ int _version SEC("version") = 1;
SEC("sk_msg1")
int bpf_prog1(struct sk_msg_md *msg)
{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
-
- char *d;
-
- if (data + 8 > data_end)
- return SK_DROP;
-
- bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
- d = (char *)data;
- bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
-
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d3597f81e6e9..8acdb99b5959 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -6,11 +6,8 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
+int page_size = 0; /* userspace should set it */
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -90,7 +87,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
+ if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */
return 1;
@@ -161,7 +158,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
- if (ctx->optlen != PAGE_SIZE * 2)
+ if (ctx->optlen != page_size * 2)
return 0; /* EPERM, unexpected data size */
if (optval + 1 > optval_end)
@@ -175,7 +172,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
+ if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */
return 1;
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 4eb42cff5fe9..5a5cc19a15bf 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4096);
__uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
__type(key, __u32);
__type(value, char);
@@ -17,7 +16,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 512 * 4); /* at least 4 pages of data */
__uint(map_flags, BPF_F_MMAPABLE);
__type(key, __u32);
__type(value, __u64);
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 8ba9959b036b..6b3f288b7c63 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -15,7 +15,6 @@ struct sample {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf SEC(".maps");
/* inputs */
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index edf3b6953533..197b86546dca 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,7 +15,6 @@ struct sample {
struct ringbuf_map {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf1 SEC(".maps"),
ringbuf2 SEC(".maps");
@@ -31,6 +30,17 @@ struct {
},
};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_hash SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ },
+};
+
/* inputs */
int pid = 0;
int target_ring = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
new file mode 100644
index 000000000000..951a0301c553
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char num_out[64] = {};
+long num_ret = 0;
+
+char ip_out[64] = {};
+long ip_ret = 0;
+
+char sym_out[64] = {};
+long sym_ret = 0;
+
+char addr_out[64] = {};
+long addr_ret = 0;
+
+char str_out[64] = {};
+long str_ret = 0;
+
+char over_out[6] = {};
+long over_ret = 0;
+
+char pad_out[10] = {};
+long pad_ret = 0;
+
+char noarg_out[64] = {};
+long noarg_ret = 0;
+
+long nobuf_ret = 0;
+
+extern const void schedule __ksym;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ /* Convenient values to pretty-print */
+ const __u8 ex_ipv4[] = {127, 0, 0, 1};
+ const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static const char str1[] = "str1";
+ static const char longstr[] = "longstr";
+
+ /* Integer types */
+ num_ret = BPF_SNPRINTF(num_out, sizeof(num_out),
+ "%d %u %x %li %llu %lX",
+ -8, 9, 150, -424242, 1337, 0xDABBAD00);
+ /* IP addresses */
+ ip_ret = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6",
+ &ex_ipv4, &ex_ipv6);
+ /* Symbol lookup formatting */
+ sym_ret = BPF_SNPRINTF(sym_out, sizeof(sym_out), "%ps %pS %pB",
+ &schedule, &schedule, &schedule);
+ /* Kernel pointers */
+ addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
+ 0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
+ /* Strings embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
+ str1, longstr);
+ /* Overflow */
+ over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
+ /* Padding of fixed width numbers */
+ pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4);
+ /* No args */
+ noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case");
+ /* No buffer */
+ nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
new file mode 100644
index 000000000000..402adaf344f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* The format string is filled from the userspace such that loading fails */
+static const char fmt[10];
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ unsigned long long arg = 42;
+
+ bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index ba6eadfec565..e7b673117436 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -396,7 +396,7 @@ int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
SEC("geneve_set_tunnel")
int _geneve_set_tunnel(struct __sk_buff *skb)
{
- int ret, ret2;
+ int ret;
struct bpf_tunnel_key key;
struct geneve_opt gopt;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index e87c8546230e..ee7e3b45182a 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -210,7 +210,7 @@ extern int test__join_cgroup(const char *path);
#define ASSERT_ERR_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = IS_ERR(___res) \
+ bool ___ok = IS_ERR(___res); \
CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
___ok; \
})
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
index 69b048cf46d9..3e024c891178 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
@@ -42,3 +42,46 @@
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
+{
+ "bpf_get_task_stack return R0 range is refined",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_6, 0), // ctx->meta->seq
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 8), // ctx->task
+ BPF_LD_MAP_FD(BPF_REG_1, 0), // fixup_map_array_48b
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // keep buf for seq_write
+ BPF_MOV64_IMM(BPF_REG_3, 48),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_EMIT_CALL(BPF_FUNC_get_task_stack),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_seq_write),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .kfunc = "task",
+ .runs = -1, // Don't run, just load
+ .fixup_map_array_48b = { 3 },
+},
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index a5ce26d548e4..9a41d8bb9ff1 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -1,6 +1,10 @@
# This mimics the top-level Makefile. We do it explicitly here so that this
# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+CC := clang
+else
CC := $(CROSS_COMPILE)gcc
+endif
ifeq (0,$(MAKELEVEL))
ifeq ($(OUTPUT),)