aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt23
-rw-r--r--Documentation/devicetree/bindings/net/sff,sfp.txt5
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/boot/dts/armada-370-rd.dts32
-rw-r--r--arch/m68k/mac/config.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c219
-rw-r--r--drivers/infiniband/core/cma.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c189
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h72
-rw-r--r--drivers/infiniband/hw/mlx5/main.c361
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h32
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c5
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c24
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/Space.c3
-rw-r--r--drivers/net/bonding/bond_main.c1
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c146
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h3
-rw-r--r--drivers/net/ethernet/apple/macmace.c25
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c6
-rw-r--r--drivers/net/ethernet/cirrus/mac89x0.c158
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c63
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.c19
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.h1
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c32
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.h1
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_tgec.c33
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_tgec.h1
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.c3
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.h2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c71
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_common.c5
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c10
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c13
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_tlv.c7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h37
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c75
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c16
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c6
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c75
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c32
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c48
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf.h72
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c825
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c414
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_port.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h145
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c206
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c522
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c12
-rw-r--r--drivers/net/ethernet/natsemi/jazzsonic.c32
-rw-r--r--drivers/net/ethernet/natsemi/macsonic.c244
-rw-r--r--drivers/net/ethernet/natsemi/sonic.c99
-rw-r--r--drivers/net/ethernet/natsemi/sonic.h2
-rw-r--r--drivers/net/ethernet/natsemi/xtsonic.c30
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c61
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c21
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.h5
-rw-r--r--drivers/net/ethernet/sfc/falcon/enum.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c18
-rw-r--r--drivers/net/geneve.c1
-rw-r--r--drivers/net/gtp.c1
-rw-r--r--drivers/net/ieee802154/Kconfig11
-rw-r--r--drivers/net/ieee802154/Makefile1
-rw-r--r--drivers/net/ieee802154/mcr20a.c1413
-rw-r--r--drivers/net/ieee802154/mcr20a.h498
-rw-r--r--drivers/net/ipvlan/ipvlan.h1
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c34
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c61
-rw-r--r--drivers/net/phy/aquantia.c20
-rw-r--r--drivers/net/phy/cortina.c18
-rw-r--r--drivers/net/phy/marvell10g.c11
-rw-r--r--drivers/net/phy/phy-c45.c20
-rw-r--r--drivers/net/phy/phylink.c35
-rw-r--r--drivers/net/phy/sfp-bus.c162
-rw-r--r--drivers/net/phy/sfp.c150
-rw-r--r--drivers/net/phy/teranetics.c32
-rw-r--r--drivers/net/ppp/ppp_generic.c1
-rw-r--r--drivers/net/ppp/pppoe.c1
-rw-r--r--drivers/net/team/team.c16
-rw-r--r--drivers/net/vrf.c1
-rw-r--r--drivers/net/vxlan.c1
-rw-r--r--drivers/net/xen-netback/rx.c2
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs_common/grace.c1
-rw-r--r--include/linux/bpf-cgroup.h2
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/linux/mlx5/driver.h6
-rw-r--r--include/linux/mlx5/eswitch.h58
-rw-r--r--include/linux/mroute.h88
-rw-r--r--include/linux/mroute6.h62
-rw-r--r--include/linux/mroute_base.h346
-rw-r--r--include/linux/phy.h8
-rw-r--r--include/linux/sfp.h18
-rw-r--r--include/net/Space.h1
-rw-r--r--include/net/ethoc.h1
-rw-r--r--include/net/fib_rules.h42
-rw-r--r--include/net/flow.h2
-rw-r--r--include/net/gre.h3
-rw-r--r--include/net/inet_connection_sock.h10
-rw-r--r--include/net/ip.h12
-rw-r--r--include/net/ip6_fib.h25
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--include/net/ip_fib.h29
-rw-r--r--include/net/ip_tunnels.h16
-rw-r--r--include/net/ipv6.h10
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/netns/ipv6.h5
-rw-r--r--include/net/pkt_cls.h8
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/net/tcp.h6
-rw-r--r--include/net/xfrm.h14
-rw-r--r--include/uapi/linux/errqueue.h2
-rw-r--r--include/uapi/linux/fib_rules.h8
-rw-r--r--include/uapi/linux/rds.h7
-rw-r--r--kernel/bpf/verifier.c4
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/bridge/br.c1
-rw-r--r--net/bridge/br_netfilter_hooks.c1
-rw-r--r--net/can/bcm.c1
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/fib_rules.c92
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/ieee802154/6lowpan/core.c1
-rw-r--r--net/ieee802154/core.c1
-rw-r--r--net/ipv4/Kconfig5
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/fib_rules.c19
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c5
-rw-r--r--net/ipv4/inetpeer.c3
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ip_tunnel.c40
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/ipmr.c585
-rw-r--r--net/ipv4/ipmr_base.c323
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c1
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c1
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c43
-rw-r--r--net/ipv4/tcp_bbr.c33
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c35
-rw-r--r--net/ipv6/anycast.c1
-rw-r--r--net/ipv6/exthdrs_core.c1
-rw-r--r--net/ipv6/fib6_rules.c27
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/ip6_gre.c7
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ip6_vti.c1
-rw-r--r--net/ipv6/ip6mr.c988
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c1
-rw-r--r--net/ipv6/proc.c1
-rw-r--r--net/ipv6/route.c34
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c1
-rw-r--r--net/kcm/kcmproc.c1
-rw-r--r--net/kcm/kcmsock.c1
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ppp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c1
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/xt_hashlimit.c1
-rw-r--r--net/netfilter/xt_recent.c1
-rw-r--r--net/phonet/pn_dev.c1
-rw-r--r--net/rds/af_rds.c7
-rw-r--r--net/rds/message.c38
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/recv.c31
-rw-r--r--net/sched/act_bpf.c1
-rw-r--r--net/sched/act_connmark.c1
-rw-r--r--net/sched/act_csum.c1
-rw-r--r--net/sched/act_gact.c1
-rw-r--r--net/sched/act_ife.c1
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c1
-rw-r--r--net/sched/act_nat.c1
-rw-r--r--net/sched/act_pedit.c1
-rw-r--r--net/sched/act_police.c1
-rw-r--r--net/sched/act_sample.c1
-rw-r--r--net/sched/act_simple.c1
-rw-r--r--net/sched/act_skbedit.c1
-rw-r--r--net/sched/act_skbmod.c1
-rw-r--r--net/sched/act_tunnel_key.c1
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/sch_api.c7
-rw-r--r--net/sched/sch_prio.c45
-rw-r--r--net/smc/af_smc.c127
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_clc.c47
-rw-r--r--net/smc/smc_clc.h9
-rw-r--r--net/smc/smc_core.c77
-rw-r--r--net/smc/smc_core.h16
-rw-r--r--net/smc/smc_llc.c408
-rw-r--r--net/smc/smc_llc.h41
-rw-r--r--net/socket.c12
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/cpustat_kern.c281
-rw-r--r--samples/bpf/cpustat_user.c219
-rw-r--r--samples/bpf/xdp_redirect_user.c7
-rw-r--r--samples/sockmap/Makefile2
-rw-r--r--samples/sockmap/sockmap_user.c1
-rw-r--r--security/selinux/hooks.c1
-rw-r--r--security/smack/smack_netfilter.c1
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c6
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c128
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/forwarding/.gitignore1
-rw-r--r--tools/testing/selftests/net/forwarding/README56
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh88
-rw-r--r--tools/testing/selftests/net/forwarding/config12
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample31
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh540
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh125
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh332
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh195
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh122
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh196
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh122
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c120
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py90
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_batch.py8
273 files changed, 11128 insertions, 3251 deletions
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644
index 000000000000..2aaef567c5be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+ - compatible: should be "nxp,mcr20a"
+ - spi-max-frequency: maximal bus speed, should be set to a frequency
+ lower than 9000000 depends sync or async operation mode
+ - reg: the chipselect index
+ - interrupts: the interrupt generated by the device. Non high-level
+ can occur deadlocks while handling isr.
+
+Optional properties:
+ - rst_b-gpio: GPIO spec for the RST_B pin
+
+Example:
+
+ mcr20a@0 {
+ compatible = "nxp,mcr20a";
+ spi-max-frequency = <9000000>;
+ reg = <0>;
+ interrupts = <17 2>;
+ interrupt-parent = <&gpio>;
+ rst_b-gpio = <&gpio 27 1>
+ };
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index f1c441bedf68..929591d52ed6 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -33,6 +33,10 @@ Optional Properties:
Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
high Tx rate. Must not be present for SFF modules
+- maximum-power-milliwatt : Maximum module power consumption
+ Specifies the maximum power consumption allowable by a module in the
+ slot, in milli-Watts. Presently, modules can be up to 1W, 1.5W or 2W.
+
Example #1: Direct serdes to SFP connection
sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
i2c-bus = <&sfp_1g_i2c>;
los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+ maximum-power-milliwatt = <1000>;
pinctrl-names = "default";
pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;
diff --git a/MAINTAINERS b/MAINTAINERS
index 93a12af4f180..e0b39004edc0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8592,6 +8592,15 @@ S: Maintained
F: Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
F: drivers/iio/potentiometer/mcp4531.c
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M: Xue Liu <liuxuenetmail@gmail.com>
+L: linux-wpan@vger.kernel.org
+W: https://github.com/xueliu/mcr20a-linux
+S: Maintained
+F: drivers/net/ieee802154/mcr20a.c
+F: drivers/net/ieee802154/mcr20a.h
+F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
M: William Breathitt Gray <vilhelm.gray@gmail.com>
L: linux-iio@vger.kernel.org
diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts
index 8b2fa9a49967..c28afb242393 100644
--- a/arch/arm/boot/dts/armada-370-rd.dts
+++ b/arch/arm/boot/dts/armada-370-rd.dts
@@ -56,6 +56,7 @@
/dts-v1/;
#include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/gpio/gpio.h>
#include "armada-370.dtsi"
@@ -243,6 +244,8 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <0x10>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
ports {
#address-cells = <1>;
@@ -278,6 +281,35 @@
};
};
};
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switchphy0: switchphy@0 {
+ reg = <0>;
+ interrupt-parent = <&switch>;
+ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ switchphy1: switchphy@1 {
+ reg = <1>;
+ interrupt-parent = <&switch>;
+ interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ switchphy2: switchphy@2 {
+ reg = <2>;
+ interrupt-parent = <&switch>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ switchphy3: switchphy@3 {
+ reg = <3>;
+ interrupt-parent = <&switch>;
+ interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
};
};
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index d3d435248a24..c73eb8209555 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
platform_device_register_simple("macsonic", -1, NULL, 0);
+ if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+ macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+ platform_device_register_simple("mac89x0", -1, NULL, 0);
+
if (macintosh_config->ether_type == MAC_ETHER_MACE)
platform_device_register_simple("macmace", -1, NULL, 0);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 45e4eb5bcbb2..cbf94d44f3d5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -61,7 +61,12 @@ static bool is_imm8(int value)
static bool is_simm32(s64 value)
{
- return value == (s64) (s32) value;
+ return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+ return value == (u64)(u32)value;
}
/* mov dst, src */
@@ -212,7 +217,7 @@ struct jit_context {
/* emit x64 prologue code for BPF program and check it's size.
* bpf_tail_call helper will skip it while jumping into another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -247,18 +252,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
/* mov qword ptr [rbp+24],r15 */
EMIT4(0x4C, 0x89, 0x7D, 24);
- /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
- * we need to reset the counter to 0. It's done in two instructions,
- * resetting rax register to 0 (xor on eax gets 0 extended), and
- * moving it to the counter location.
- */
+ if (!ebpf_from_cbpf) {
+ /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+ * calls we need to reset the counter to 0. It's done in two
+ * instructions, resetting rax register to 0, and moving it
+ * to the counter location.
+ */
+
+ /* xor eax, eax */
+ EMIT2(0x31, 0xc0);
+ /* mov qword ptr [rbp+32], rax */
+ EMIT4(0x48, 0x89, 0x45, 32);
- /* xor eax, eax */
- EMIT2(0x31, 0xc0);
- /* mov qword ptr [rbp+32], rax */
- EMIT4(0x48, 0x89, 0x45, 32);
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+ }
- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
*pprog = prog;
}
@@ -356,6 +364,86 @@ static void emit_load_skb_data_hlen(u8 **pprog)
*pprog = prog;
}
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+ u32 dst_reg, const u32 imm32)
+{
+ u8 *prog = *pprog;
+ u8 b1, b2, b3;
+ int cnt = 0;
+
+ /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+ * (which zero-extends imm32) to save 2 bytes.
+ */
+ if (sign_propagate && (s32)imm32 < 0) {
+ /* 'mov %rax, imm32' sign extends imm32 */
+ b1 = add_1mod(0x48, dst_reg);
+ b2 = 0xC7;
+ b3 = 0xC0;
+ EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+ goto done;
+ }
+
+ /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+ * to save 3 bytes.
+ */
+ if (imm32 == 0) {
+ if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ b2 = 0x31; /* xor */
+ b3 = 0xC0;
+ EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+ goto done;
+ }
+
+ /* mov %eax, imm32 */
+ if (is_ereg(dst_reg))
+ EMIT1(add_1mod(0x40, dst_reg));
+ EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+ *pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+ const u32 imm32_hi, const u32 imm32_lo)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+ /* For emitting plain u32, where sign bit must not be
+ * propagated LLVM tends to load imm64 over mov32
+ * directly, so save couple of bytes by just doing
+ * 'mov %eax, imm32' instead.
+ */
+ emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+ } else {
+ /* movabsq %rax, imm64 */
+ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+ EMIT(imm32_lo, 4);
+ EMIT(imm32_hi, 4);
+ }
+
+ *pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is64) {
+ /* mov dst, src */
+ EMIT_mov(dst_reg, src_reg);
+ } else {
+ /* mov32 dst, src */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT1(add_2mod(0x40, dst_reg, src_reg));
+ EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ }
+
+ *pprog = prog;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -369,7 +457,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int proglen = 0;
u8 *prog = temp;
- emit_prologue(&prog, bpf_prog->aux->stack_depth);
+ emit_prologue(&prog, bpf_prog->aux->stack_depth,
+ bpf_prog_was_classic(bpf_prog));
if (seen_ld_abs)
emit_load_skb_data_hlen(&prog);
@@ -378,7 +467,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
- u8 b1 = 0, b2 = 0, b3 = 0;
+ u8 b2 = 0, b3 = 0;
s64 jmp_offset;
u8 jmp_cond;
bool reload_skb_data;
@@ -414,16 +503,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
- /* mov dst, src */
case BPF_ALU64 | BPF_MOV | BPF_X:
- EMIT_mov(dst_reg, src_reg);
- break;
-
- /* mov32 dst, src */
case BPF_ALU | BPF_MOV | BPF_X:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT1(add_2mod(0x40, dst_reg, src_reg));
- EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ emit_mov_reg(&prog,
+ BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, src_reg);
break;
/* neg dst */
@@ -486,58 +570,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
case BPF_ALU64 | BPF_MOV | BPF_K:
- /* optimization: if imm32 is positive,
- * use 'mov eax, imm32' (which zero-extends imm32)
- * to save 2 bytes
- */
- if (imm32 < 0) {
- /* 'mov rax, imm32' sign extends imm32 */
- b1 = add_1mod(0x48, dst_reg);
- b2 = 0xC7;
- b3 = 0xC0;
- EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
- break;
- }
-
case BPF_ALU | BPF_MOV | BPF_K:
- /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
- * to save 3 bytes.
- */
- if (imm32 == 0) {
- if (is_ereg(dst_reg))
- EMIT1(add_2mod(0x40, dst_reg, dst_reg));
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
- break;
- }
-
- /* mov %eax, imm32 */
- if (is_ereg(dst_reg))
- EMIT1(add_1mod(0x40, dst_reg));
- EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+ emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, imm32);
break;
case BPF_LD | BPF_IMM | BPF_DW:
- /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
- * to save 7 bytes.
- */
- if (insn[0].imm == 0 && insn[1].imm == 0) {
- b1 = add_2mod(0x48, dst_reg, dst_reg);
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
- insn++;
- i++;
- break;
- }
-
- /* movabsq %rax, imm64 */
- EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
- EMIT(insn[0].imm, 4);
- EMIT(insn[1].imm, 4);
-
+ emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
insn++;
i++;
break;
@@ -594,36 +633,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_X:
- EMIT1(0x50); /* push rax */
- EMIT1(0x52); /* push rdx */
+ {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+ if (dst_reg != BPF_REG_0)
+ EMIT1(0x50); /* push rax */
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x52); /* push rdx */
/* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg);
if (BPF_SRC(insn->code) == BPF_X)
- /* mov rax, src_reg */
- EMIT_mov(BPF_REG_0, src_reg);
+ emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
else
- /* mov rax, imm32 */
- EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+ emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
- if (BPF_CLASS(insn->code) == BPF_ALU64)
+ if (is64)
EMIT1(add_1mod(0x48, AUX_REG));
else if (is_ereg(AUX_REG))
EMIT1(add_1mod(0x40, AUX_REG));
/* mul(q) r11 */
EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
- /* mov r11, rax */
- EMIT_mov(AUX_REG, BPF_REG_0);
-
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
-
- /* mov dst_reg, r11 */
- EMIT_mov(dst_reg, AUX_REG);
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x5A); /* pop rdx */
+ if (dst_reg != BPF_REG_0) {
+ /* mov dst_reg, rax */
+ EMIT_mov(dst_reg, BPF_REG_0);
+ EMIT1(0x58); /* pop rax */
+ }
break;
-
+ }
/* shifts */
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K:
@@ -641,7 +682,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_RSH: b3 = 0xE8; break;
case BPF_ARSH: b3 = 0xF8; break;
}
- EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+ if (imm32 == 1)
+ EMIT2(0xD1, add_1reg(b3, dst_reg));
+ else
+ EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
break;
case BPF_ALU | BPF_LSH | BPF_X:
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e66963ca58bd..3ae32d1ddd27 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4549,6 +4549,7 @@ static struct pernet_operations cma_pernet_operations = {
.exit = cma_exit_net,
.id = &cma_pernet_id,
.size = sizeof(struct cma_pernet),
+ .async = true,
};
static int __init cma_init(void)
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index bc6299697dda..d42b922bede8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644
index 000000000000..61cc3d7db257
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static const struct mlx5_ib_profile rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_rep_flow_db_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+ mlx5_ib_stage_umr_res_init,
+ mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+};
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+ if (!ibdev)
+ return -ENOMEM;
+
+ ibdev->rep = rep;
+ ibdev->mdev = dev;
+ ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+ MLX5_CAP_GEN(dev, num_vhca_ports));
+ if (!__mlx5_ib_add(ibdev, &rep_profile))
+ return -EINVAL;
+
+ rep->rep_if[REP_IB].priv = ibdev;
+
+ return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_ib_dev *dev;
+
+ if (!rep->rep_if[REP_IB].priv)
+ return;
+
+ dev = mlx5_ib_rep_to_dev(rep);
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ rep->rep_if[REP_IB].priv = NULL;
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_vport_rep_load;
+ rep_if.unload = mlx5_ib_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+ }
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5_ib_nic_rep_load;
+ rep_if.unload = mlx5_ib_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+ rep_if.priv = dev;
+
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+ mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+ return mlx5_eswitch_vport_rep(esw, vport);
+}
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+ if (!dev->rep)
+ return 0;
+
+ flow_rule =
+ mlx5_eswitch_add_send_to_vport_rule(esw,
+ dev->rep->vport,
+ sq->base.mqp.qpn);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+ sq->flow_rule = flow_rule;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644
index 000000000000..046fd942fd46
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+ return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ return 0;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ int vport_index)
+{
+ return NULL;
+}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+ return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4236c8086820..ee55d7d64554 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -57,6 +57,7 @@
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
#include "cmd.h"
#define DRIVER_NAME "mlx5_ib"
@@ -130,7 +131,7 @@ static int get_port_state(struct ib_device *ibdev,
int ret;
memset(&attr, 0, sizeof(attr));
- ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ ret = ibdev->query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
@@ -154,10 +155,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
write_lock(&roce->netdev_lock);
-
- if (ndev->dev.parent == &mdev->pdev->dev)
- roce->netdev = (event == NETDEV_UNREGISTER) ?
+ if (ibdev->rep) {
+ struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+ struct net_device *rep_ndev;
+
+ rep_ndev = mlx5_ib_get_rep_netdev(esw,
+ ibdev->rep->vport);
+ if (rep_ndev == ndev)
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
+ } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+ roce->netdev = (event == NETDEV_UNREGISTER) ?
+ NULL : ndev;
+ }
write_unlock(&roce->netdev_lock);
break;
@@ -1268,6 +1278,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
return ret;
}
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ int ret;
+
+ /* Only link layer == ethernet is valid for representors */
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ if (ret || !props)
+ return ret;
+
+ /* We don't support GIDS */
+ props->gid_tbl_len = 0;
+
+ return ret;
+}
+
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
@@ -2631,7 +2657,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rules(iter->rule);
@@ -2642,7 +2668,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
mlx5_del_flow_rules(handler->rule);
put_flow_table(dev, handler->prio, true);
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(handler);
@@ -2691,7 +2717,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db.prios[priority];
+ prio = &dev->flow_db->prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2699,7 +2725,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
build_leftovers_ft_param(&priority,
&num_entries,
&num_groups);
- prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
@@ -2709,7 +2735,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
MLX5_FLOW_NAMESPACE_SNIFFER_RX :
MLX5_FLOW_NAMESPACE_SNIFFER_TX);
- prio = &dev->flow_db.sniffer[ft_type];
+ prio = &dev->flow_db->sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
@@ -2802,6 +2828,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
+ if (dev->rep) {
+ void *misc;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port,
+ dev->rep->vport);
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_drop) {
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
@@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (!dst)
return ERR_PTR(-ENOMEM);
- mutex_lock(&dev->flow_db.lock);
+ mutex_lock(&dev->flow_db->lock);
ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
@@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
goto destroy_ft;
}
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
return &handler->ibflow;
@@ -3058,7 +3096,7 @@ destroy_ft:
if (ft_prio_tx)
put_flow_table(dev, ft_prio_tx, false);
unlock:
- mutex_unlock(&dev->flow_db.lock);
+ mutex_unlock(&dev->flow_db->lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
@@ -3772,6 +3810,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ return 0;
+}
+
static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
@@ -3802,7 +3859,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
goto err_destroy_vport_lag;
}
- dev->flow_db.lag_demux_ft = ft;
+ dev->flow_db->lag_demux_ft = ft;
return 0;
err_destroy_vport_lag:
@@ -3814,9 +3871,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- if (dev->flow_db.lag_demux_ft) {
- mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
- dev->flow_db.lag_demux_ft = NULL;
+ if (dev->flow_db->lag_demux_ft) {
+ mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+ dev->flow_db->lag_demux_ft = NULL;
mlx5_cmd_destroy_vport_lag(mdev);
}
@@ -3848,14 +3905,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
{
int err;
- err = mlx5_add_netdev_notifier(dev, port_num);
- if (err)
- return err;
-
if (MLX5_CAP_GEN(dev->mdev, roce)) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
- goto err_unregister_netdevice_notifier;
+ return err;
}
err = mlx5_eth_lag_init(dev);
@@ -3868,8 +3921,6 @@ err_disable_roce:
if (MLX5_CAP_GEN(dev->mdev, roce))
mlx5_nic_vport_disable_roce(dev->mdev);
-err_unregister_netdevice_notifier:
- mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
@@ -4503,7 +4554,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4512,7 +4563,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
kfree(dev->port);
}
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
const char *name;
@@ -4564,7 +4615,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dev.parent = &mdev->pdev->dev;
- mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4585,7 +4635,38 @@ err_free_port:
return -ENOMEM;
}
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dev *nic_dev;
+
+ nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+ if (!nic_dev)
+ return -EINVAL;
+
+ dev->flow_db = nic_dev->flow_db;
+
+ return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+ kfree(dev->flow_db);
+}
+
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int err;
@@ -4626,7 +4707,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
@@ -4669,7 +4749,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4720,6 +4799,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return 0;
}
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_immutable;
+ dev->ib_dev.query_port = mlx5_ib_query_port;
+
+ return 0;
+}
+
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
+ dev->ib_dev.query_port = mlx5_ib_rep_query_port;
+
+ return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ int i;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->roce[i].dev = dev;
+ dev->roce[i].native_port_num = i + 1;
+ dev->roce[i].last_port_state = IB_PORT_DOWN;
+ }
+
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
+ dev->ib_dev.create_wq = mlx5_ib_create_wq;
+ dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
+ dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
+ dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+ dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+ return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
+ int err = 0;
+ u8 port_num;
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+ return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -4727,37 +4880,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
int port_type_cap;
u8 port_num;
int err;
- int i;
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- for (i = 0; i < dev->num_ports; i++) {
- dev->roce[i].dev = dev;
- dev->roce[i].native_port_num = i + 1;
- dev->roce[i].last_port_state = IB_PORT_DOWN;
- }
+ err = mlx5_ib_stage_common_roce_init(dev, port_num);
+ if (err)
+ return err;
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
err = mlx5_enable_eth(dev, port_num);
if (err)
- return err;
+ goto cleanup;
}
return 0;
+cleanup:
+ mlx5_ib_stage_common_roce_cleanup(dev);
+
+ return err;
}
static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4773,16 +4915,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_eth(dev);
- mlx5_remove_netdev_notifier(dev, port_num);
+ mlx5_ib_stage_common_roce_cleanup(dev);
}
}
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
{
return create_dev_resources(&dev->devr);
}
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_dev_resources(&dev->devr);
}
@@ -4794,7 +4936,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
return mlx5_ib_odp_init_one(dev);
}
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
@@ -4806,7 +4948,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
return 0;
}
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@@ -4837,7 +4979,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
}
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
@@ -4852,28 +4994,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
return err;
}
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
{
ib_unregister_device(&dev->ib_dev);
}
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
{
return create_umr_res(dev);
}
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
}
@@ -4890,7 +5032,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
{
int err;
int i;
@@ -4905,9 +5047,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
return 0;
}
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile,
- int stage)
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_register_vport_reps(dev);
+
+ return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_unregister_vport_reps(dev);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage)
{
/* Number of stages to cleanup */
while (stage) {
@@ -4921,23 +5075,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile)
{
- struct mlx5_ib_dev *dev;
int err;
int i;
printk_once(KERN_INFO "%s", mlx5_version);
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
- if (!dev)
- return NULL;
-
- dev->mdev = mdev;
- dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
- MLX5_CAP_GEN(mdev, num_vhca_ports));
-
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
@@ -4961,9 +5106,15 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
@@ -4999,6 +5150,48 @@ static const struct mlx5_ib_profile pf_profile = {
NULL),
};
+static const struct mlx5_ib_profile nic_rep_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+ mlx5_ib_stage_flow_db_init,
+ mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_rep_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_stage_rep_roce_init,
+ mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_stage_dev_res_init,
+ mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_stage_counters_init,
+ mlx5_ib_stage_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UAR,
+ mlx5_ib_stage_uar_init,
+ mlx5_ib_stage_uar_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+ mlx5_ib_stage_umr_res_init,
+ mlx5_ib_stage_umr_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+ mlx5_ib_stage_class_attr_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+ mlx5_ib_stage_rep_reg_init,
+ mlx5_ib_stage_rep_reg_cleanup),
+};
+
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
{
struct mlx5_ib_multiport_info *mpi;
@@ -5044,8 +5237,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
enum rdma_link_layer ll;
+ struct mlx5_ib_dev *dev;
int port_type_cap;
+ printk_once(KERN_INFO "%s", mlx5_version);
+
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
@@ -5055,7 +5251,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
return mlx5_ib_add_slave_port(mdev, port_num);
}
- return __mlx5_ib_add(mdev, &pf_profile);
+ dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev)
+ return NULL;
+
+ dev->mdev = mdev;
+ dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+ if (MLX5_VPORT_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+ dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+ return __mlx5_ib_add(dev, &nic_rep_profile);
+ }
+
+ return __mlx5_ib_add(dev, &pf_profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index eafb9751daf6..e0bad28e0f09 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
struct mlx5_ib_wq *sq;
struct mlx5_ib_ubuffer ubuffer;
struct mlx5_db *doorbell;
+ struct mlx5_flow_handle *flow_rule;
u32 tisn;
u8 state;
};
@@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop {
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
+ MLX5_IB_STAGE_FLOW_DB,
MLX5_IB_STAGE_CAPS,
+ MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
MLX5_IB_STAGE_DEVICE_RESOURCES,
MLX5_IB_STAGE_ODP,
@@ -743,6 +746,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_UMR_RESOURCES,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_REP_REG,
MLX5_IB_STAGE_MAX,
};
@@ -797,7 +801,7 @@ struct mlx5_ib_dev {
struct srcu_struct mr_srcu;
u32 null_mkey;
#endif
- struct mlx5_ib_flow_db flow_db;
+ struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
@@ -807,6 +811,7 @@ struct mlx5_ib_dev {
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
+ struct mlx5_eswitch_rep *rep;
/* protect the user_td */
struct mutex lb_mutex;
@@ -1049,6 +1054,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile);
+
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
u8 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 556e015678de..a5fad3e87ff7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return;
debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
struct mlx5_cache_ent *ent;
int i;
- if (!mlx5_debugfs_root)
+ if (!mlx5_debugfs_root || dev->rep)
return 0;
cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
MLX5_IB_UMR_OCTOWORD;
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ !dev->rep &&
mlx5_core_is_pf(dev->mdev))
ent->limit = dev->mdev->profile->mr_cache[i].limit;
else
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 39d24bf694a8..5663530ea5fd 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -36,6 +36,7 @@
#include <rdma/ib_user_verbs.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include "ib_rep.h"
/* not supported currently */
static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
mlx5_core_destroy_tis(dev->mdev, sq->tisn);
}
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ if (sq->flow_rule)
+ mlx5_del_flow_rules(sq->flow_rule);
+}
+
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, void *qpin,
struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
if (err)
goto err_umem;
+ err = create_flow_rule_vport_sq(dev, sq);
+ if (err)
+ goto err_flow;
+
return 0;
+err_flow:
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
err_umem:
ib_umem_release(sq->ubuffer.umem);
sq->ubuffer.umem = NULL;
@@ -1157,6 +1172,7 @@ err_umem:
static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
+ destroy_flow_rule_vport_sq(dev, sq);
mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
ib_umem_release(sq->ubuffer.umem);
}
@@ -1263,6 +1279,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
if (tunnel_offload_en)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
kvfree(in);
@@ -1554,6 +1574,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
+ if (dev->rep)
+ MLX5_SET(tirc, tirc, self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
if (err)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d88b78a17440..08b85215c2be 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@ config MACVTAP
config IPVLAN
tristate "IP-VLAN support"
depends on INET
+ depends on IPV6 || !IPV6
depends on NETFILTER
select NET_L3_MASTER_DEV
---help---
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 64333ec999ac..3afda6561434 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -114,9 +114,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */
{mvme147lance_probe, 0},
#endif
-#ifdef CONFIG_MAC89x0
- {mac89x0_probe, 0},
-#endif
{NULL, 0},
};
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c669554d70bb..4c19d23dd282 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = {
.exit = bond_net_exit,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
+ .async = true,
};
static int __init bonding_init(void)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index e1b5c5c66fce..24486f96dd39 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -253,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
chip->g1_irq.masked &= ~(1 << n);
}
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
{
- struct mv88e6xxx_chip *chip = dev_id;
unsigned int nhandled = 0;
unsigned int sub_irq;
unsigned int n;
@@ -280,6 +279,13 @@ out:
return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
}
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+ struct mv88e6xxx_chip *chip = dev_id;
+
+ return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
{
struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -335,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
.xlate = irq_domain_xlate_twocell,
};
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
{
int irq, virq;
u16 mask;
@@ -344,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
- free_irq(chip->irq, chip);
-
for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
virq = irq_find_mapping(chip->g1_irq.domain, irq);
irq_dispose_mapping(virq);
@@ -354,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
irq_domain_remove(chip->g1_irq.domain);
}
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+ mv88e6xxx_g1_irq_free(chip);
+
+ free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
{
int err, irq, virq;
u16 reg, mask;
@@ -387,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
if (err)
goto out_disable;
- err = request_threaded_irq(chip->irq, NULL,
- mv88e6xxx_g1_irq_thread_fn,
- IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
- dev_name(chip->dev), chip);
- if (err)
- goto out_disable;
-
return 0;
out_disable:
@@ -411,6 +415,62 @@ out_mapping:
return err;
}
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+ int err;
+
+ err = mv88e6xxx_g1_irq_setup_common(chip);
+ if (err)
+ return err;
+
+ err = request_threaded_irq(chip->irq, NULL,
+ mv88e6xxx_g1_irq_thread_fn,
+ IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+ dev_name(chip->dev), chip);
+ if (err)
+ mv88e6xxx_g1_irq_free_common(chip);
+
+ return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+ struct mv88e6xxx_chip *chip = container_of(work,
+ struct mv88e6xxx_chip,
+ irq_poll_work.work);
+ mv88e6xxx_g1_irq_thread_work(chip);
+
+ kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+ msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+ int err;
+
+ err = mv88e6xxx_g1_irq_setup_common(chip);
+ if (err)
+ return err;
+
+ kthread_init_delayed_work(&chip->irq_poll_work,
+ mv88e6xxx_irq_poll);
+
+ chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+ if (IS_ERR(chip->kworker))
+ return PTR_ERR(chip->kworker);
+
+ kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+ msecs_to_jiffies(100));
+
+ return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+ kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+ kthread_destroy_worker(chip->kworker);
+}
+
int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
{
int i;
@@ -4034,33 +4094,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
goto out;
}
- if (chip->irq > 0) {
- /* Has to be performed before the MDIO bus is created,
- * because the PHYs will link there interrupts to these
- * interrupt controllers
- */
- mutex_lock(&chip->reg_lock);
+ /* Has to be performed before the MDIO bus is created, because
+ * the PHYs will link there interrupts to these interrupt
+ * controllers
+ */
+ mutex_lock(&chip->reg_lock);
+ if (chip->irq > 0)
err = mv88e6xxx_g1_irq_setup(chip);
- mutex_unlock(&chip->reg_lock);
-
- if (err)
- goto out;
-
- if (chip->info->g2_irqs > 0) {
- err = mv88e6xxx_g2_irq_setup(chip);
- if (err)
- goto out_g1_irq;
- }
+ else
+ err = mv88e6xxx_irq_poll_setup(chip);
+ mutex_unlock(&chip->reg_lock);
- err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
- if (err)
- goto out_g2_irq;
+ if (err)
+ goto out;
- err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+ if (chip->info->g2_irqs > 0) {
+ err = mv88e6xxx_g2_irq_setup(chip);
if (err)
- goto out_g1_atu_prob_irq;
+ goto out_g1_irq;
}
+ err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+ if (err)
+ goto out_g2_irq;
+
+ err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+ if (err)
+ goto out_g1_atu_prob_irq;
+
err = mv88e6xxx_mdios_register(chip, np);
if (err)
goto out_g1_vtu_prob_irq;
@@ -4074,20 +4135,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
out_mdio:
mv88e6xxx_mdios_unregister(chip);
out_g1_vtu_prob_irq:
- if (chip->irq > 0)
- mv88e6xxx_g1_vtu_prob_irq_free(chip);
+ mv88e6xxx_g1_vtu_prob_irq_free(chip);
out_g1_atu_prob_irq:
- if (chip->irq > 0)
- mv88e6xxx_g1_atu_prob_irq_free(chip);
+ mv88e6xxx_g1_atu_prob_irq_free(chip);
out_g2_irq:
- if (chip->info->g2_irqs > 0 && chip->irq > 0)
+ if (chip->info->g2_irqs > 0)
mv88e6xxx_g2_irq_free(chip);
out_g1_irq:
- if (chip->irq > 0) {
- mutex_lock(&chip->reg_lock);
+ mutex_lock(&chip->reg_lock);
+ if (chip->irq > 0)
mv88e6xxx_g1_irq_free(chip);
- mutex_unlock(&chip->reg_lock);
- }
+ else
+ mv88e6xxx_irq_poll_free(chip);
+ mutex_unlock(&chip->reg_lock);
out:
return err;
}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 97d7915f32c7..d6a1391dc268 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -15,6 +15,7 @@
#include <linux/if_vlan.h>
#include <linux/irq.h>
#include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
#include <linux/phy.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/timecounter.h>
@@ -245,6 +246,8 @@ struct mv88e6xxx_chip {
int watchdog_irq;
int atu_prob_irq;
int vtu_prob_irq;
+ struct kthread_worker *kworker;
+ struct kthread_delayed_work irq_poll_work;
/* GPIO resources */
u8 gpio_data[2];
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index f17a160dbff2..137cbb470af2 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
dev->netdev_ops = &mace_netdev_ops;
dev->watchdog_timeo = TX_TIMEOUT;
- printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
- dev->name, dev->dev_addr);
+ pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+ dev->dev_addr, mp->chipid);
err = register_netdev(dev);
if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
else if (fs & (UFLO|LCOL|RTRY)) {
++dev->stats.tx_aborted_errors;
if (mb->xmtfs & UFLO) {
- printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
dev->stats.tx_fifo_errors++;
mace_txdma_reset(dev);
}
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
dev->stats.rx_errors++;
- if (frame_status & RS_OFLO) {
- printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+ if (frame_status & RS_OFLO)
dev->stats.rx_fifo_errors++;
- }
if (frame_status & RS_CLSN)
dev->stats.collisions++;
if (frame_status & RS_FRAMERR)
@@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
},
};
-static int __init mac_mace_init_module(void)
-{
- if (!MACH_IS_MAC)
- return -ENODEV;
-
- return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
- platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 3177b0c9bd2d..db92f1858060 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
return ret;
}
- /* Clear out any old resources being used by the filter before
- * we start constructing the new filter.
- */
- if (f->valid)
- clear_filter(adapter, f);
-
if (is_t6(adapter->params.chip) && fs->type &&
ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
IPV6_ADDR_ANY) {
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 977d4c2c759d..3f8fe8fd79cc 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -56,21 +56,11 @@
local_irq_{dis,en}able()
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
static const char version[] =
"cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
- or override something. */
#include <linux/module.h>
/*
@@ -93,6 +83,7 @@ static const char version[] =
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/netdevice.h>
+#include <linux/platform_device.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/delay.h>
@@ -105,24 +96,22 @@ static const char version[] =
#include "cs89x0.h"
-static unsigned int net_debug = NET_DEBUG;
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message level");
/* Information that need to be kept for each board. */
struct net_local {
+ int msg_enable;
int chip_type; /* one of: CS8900, CS8920, CS8920M */
char chip_revision; /* revision letter of the chip ('A'...) */
int send_cmd; /* the propercommand used to send a packet. */
int rx_mode;
int curr_rx_cfg;
int send_underrun; /* keep track of how many underruns in a row we get */
- struct sk_buff *skb;
};
/* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
static int net_open(struct net_device *dev);
static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +121,6 @@ static int net_close(struct net_device *dev);
static struct net_device_stats *net_get_stats(struct net_device *dev);
static int set_mac_address(struct net_device *dev, void *addr);
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
/* For reading/writing registers ISA-style */
static inline int
readreg_io(struct net_device *dev, int portno)
@@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = {
/* Probe for the CS8900 card in slot E. We won't bother looking
anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
{
struct net_device *dev;
- static int once_is_enough;
struct net_local *lp;
- static unsigned version_printed;
int i, slot;
unsigned rev_type = 0;
unsigned long ioaddr;
@@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit)
int err = -ENODEV;
struct nubus_rsrc *fres;
- if (!MACH_IS_MAC)
- return ERR_PTR(-ENODEV);
-
dev = alloc_etherdev(sizeof(struct net_local));
if (!dev)
- return ERR_PTR(-ENOMEM);
-
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
-
- if (once_is_enough)
- goto out;
- once_is_enough = 1;
+ return -ENOMEM;
/* We might have to parameterize this later */
slot = 0xE;
@@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit)
if (sig != swab16(CHIP_EISA_ID_SIG))
goto out;
+ SET_NETDEV_DEV(dev, &pdev->dev);
+
/* Initialize the net_device structure. */
lp = netdev_priv(dev);
+ lp->msg_enable = netif_msg_init(debug, 0);
+
/* Fill in the 'dev' fields. */
dev->base_addr = ioaddr;
dev->mem_start = (unsigned long)
@@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit)
if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
lp->send_cmd = TX_NOW;
- if (net_debug && version_printed++ == 0)
- printk(version);
+ netif_dbg(lp, drv, dev, "%s", version);
- printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
- dev->name,
- lp->chip_type==CS8900?'0':'2',
- lp->chip_type==CS8920M?"M":"",
- lp->chip_revision,
- dev->base_addr);
+ pr_info("cs89%c0%s rev %c found at %#8lx\n",
+ lp->chip_type == CS8900 ? '0' : '2',
+ lp->chip_type == CS8920M ? "M" : "",
+ lp->chip_revision, dev->base_addr);
/* Try to read the MAC address */
if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
- printk("\nmac89x0: No EEPROM, giving up now.\n");
+ pr_info("No EEPROM, giving up now.\n");
goto out1;
} else {
for (i = 0; i < ETH_ALEN; i += 2) {
@@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit)
/* print the IRQ and ethernet address. */
- printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+ pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
dev->netdev_ops = &mac89x0_netdev_ops;
err = register_netdev(dev);
if (err)
goto out1;
- return NULL;
+
+ platform_set_drvdata(pdev, dev);
+ return 0;
out1:
nubus_writew(0, dev->base_addr + ADD_PORT);
out:
free_netdev(dev);
- return ERR_PTR(err);
+ return err;
}
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
- int reset_start_time;
-
- writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
- /* wait 30 ms */
- msleep_interruptible(30);
-
- /* Wait until the chip is reset */
- reset_start_time = jiffies;
- while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
- ;
-}
-#endif
-
/* Open/initialize the board. This is called (in the current kernel)
sometime after booting when the 'ifconfig' program is run.
@@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
struct net_local *lp = netdev_priv(dev);
unsigned long flags;
- if (net_debug > 3)
- printk("%s: sent %d byte packet of type %x\n",
- dev->name, skb->len,
- (skb->data[ETH_ALEN+ETH_ALEN] << 8)
- | skb->data[ETH_ALEN+ETH_ALEN+1]);
+ netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+ skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+ skb->data[ETH_ALEN + ETH_ALEN + 1]);
/* keep the upload from being interrupted, since we
ask the chip to start transmitting before the
@@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
struct net_local *lp;
int ioaddr, status;
- if (dev == NULL) {
- printk ("net_interrupt(): irq %d for unknown device.\n", irq);
- return IRQ_NONE;
- }
-
ioaddr = dev->base_addr;
lp = netdev_priv(dev);
@@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
faster than you can read them off, you're screwed. Hasta la
vista, baby! */
while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
- if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+ netif_dbg(lp, intr, dev, "status=%04x\n", status);
switch(status & ISQ_EVENT_MASK) {
case ISQ_RECEIVER_EVENT:
/* Got a packet(s). */
@@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
netif_wake_queue(dev);
}
if (status & TX_UNDERRUN) {
- if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+ netif_dbg(lp, tx_err, dev, "transmit underrun\n");
lp->send_underrun++;
if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
static void
net_rx(struct net_device *dev)
{
+ struct net_local *lp = netdev_priv(dev);
struct sk_buff *skb;
int status, length;
@@ -506,7 +456,6 @@ net_rx(struct net_device *dev)
/* Malloc up new buffer. */
skb = alloc_skb(length, GFP_ATOMIC);
if (skb == NULL) {
- printk("%s: Memory squeeze, dropping packet.\n", dev->name);
dev->stats.rx_dropped++;
return;
}
@@ -515,10 +464,9 @@ net_rx(struct net_device *dev)
skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
length);
- if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
- dev->name, length,
- (skb->data[ETH_ALEN+ETH_ALEN] << 8)
- | skb->data[ETH_ALEN+ETH_ALEN+1]);
+ netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+ length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+ skb->data[ETH_ALEN + ETH_ALEN + 1]);
skb->protocol=eth_type_trans(skb,dev);
netif_rx(skb);
@@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
return -EADDRNOTAVAIL;
memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
- printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+ netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
/* set the Ethernet address */
for (i=0; i < ETH_ALEN/2; i++)
@@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
return 0;
}
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
MODULE_LICENSE("GPL");
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
{
- net_debug = debug;
- dev_cs89x0 = mac89x0_probe(-1);
- if (IS_ERR(dev_cs89x0)) {
- printk(KERN_WARNING "mac89x0.c: No card found\n");
- return PTR_ERR(dev_cs89x0);
- }
+ struct net_device *dev = platform_get_drvdata(pdev);
+
+ unregister_netdev(dev);
+ nubus_writew(0, dev->base_addr + ADD_PORT);
+ free_netdev(dev);
return 0;
}
-void
-cleanup_module(void)
-{
- unregister_netdev(dev_cs89x0);
- nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
- free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+ .probe = mac89x0_device_probe,
+ .remove = mac89x0_device_remove,
+ .driver = {
+ .name = "mac89x0",
+ },
+};
+
+module_platform_driver(mac89x0_platform_driver);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1a49297224ed..ff92ab1daeb8 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -19,7 +19,7 @@
#include "be.h"
#include "be_cmds.h"
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
"Physical Link is functional",
"Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
"Optics of two types installed – Remove one optic or install matching pair of optics.",
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 09da2d82c2f0..e8b43cf44b6f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -201,7 +201,7 @@ enum {
phy_state == BE_PHY_UNQUALIFIED || \
phy_state == BE_PHY_UNCERTIFIED)
-extern char *be_misconfig_evt_port_state[];
+extern const char * const be_misconfig_evt_port_state[];
/* async event indicating misconfigured port */
struct be_async_event_misconfig_port {
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index a998c36c5e61..159dc2df878d 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
err);
}
+ if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+ priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+ err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+ priv->mac_dev->allmulti);
+ if (err < 0)
+ netif_err(priv, drv, net_dev,
+ "mac_dev->set_allmulti() = %d\n",
+ err);
+ }
+
err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
if (err < 0)
netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
@@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
goto csum_failed;
}
+ /* SGT[0] is used by the linear part */
sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
- qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+ frag_len = skb_headlen(skb);
+ qm_sg_entry_set_len(&sgt[0], frag_len);
sgt[0].bpid = FSL_DPAA_BPID_INV;
sgt[0].offset = 0;
addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
qm_sg_entry_set64(&sgt[0], addr);
/* populate the rest of SGT entries */
- frag = &skb_shinfo(skb)->frags[0];
- frag_len = frag->size;
- for (i = 1; i <= nr_frags; i++, frag++) {
+ for (i = 0; i < nr_frags; i++) {
+ frag = &skb_shinfo(skb)->frags[i];
+ frag_len = frag->size;
WARN_ON(!skb_frag_page(frag));
addr = skb_frag_dma_map(dev, frag, 0,
frag_len, dma_dir);
@@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
goto sg_map_failed;
}
- qm_sg_entry_set_len(&sgt[i], frag_len);
- sgt[i].bpid = FSL_DPAA_BPID_INV;
- sgt[i].offset = 0;
+ qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+ sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+ sgt[i + 1].offset = 0;
/* keep the offset in the address */
- qm_sg_entry_set64(&sgt[i], addr);
- frag_len = frag->size;
+ qm_sg_entry_set64(&sgt[i + 1], addr);
}
- qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+ /* Set the final bit in the last used entry of the SGT */
+ qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
@@ -2052,19 +2065,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
* make sure we don't feed FMan with more fragments than it supports.
*/
- if (nonlinear &&
- likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
- /* Just create a S/G fd based on the skb */
- err = skb_to_sg_fd(priv, skb, &fd);
- percpu_priv->tx_frag_skbuffs++;
- } else {
+ if (unlikely(nonlinear &&
+ (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
/* If the egress skb contains more fragments than we support
* we have no choice but to linearize it ourselves.
*/
- if (unlikely(nonlinear) && __skb_linearize(skb))
+ if (__skb_linearize(skb))
goto enomem;
- /* Finally, create a contig FD from this skb */
+ nonlinear = skb_is_nonlinear(skb);
+ }
+
+ if (nonlinear) {
+ /* Just create a S/G fd based on the skb */
+ err = skb_to_sg_fd(priv, skb, &fd);
+ percpu_priv->tx_frag_skbuffs++;
+ } else {
+ /* Create a contig FD from this skb */
err = skb_to_contig_fd(priv, skb, &fd, &offset);
}
if (unlikely(err < 0))
@@ -2201,14 +2218,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
if (dpaa_eth_napi_schedule(percpu_priv, portal))
return qman_cb_dqrr_stop;
- if (dpaa_eth_refill_bpools(priv))
- /* Unable to refill the buffer pool due to insufficient
- * system memory. Just release the frame back into the pool,
- * otherwise we'll soon end up with an empty buffer pool.
- */
- dpaa_fd_release(net_dev, &dq->fd);
- else
- dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+ dpaa_eth_refill_bpools(priv);
+ dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
return qman_cb_dqrr_consume;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index ea43b4974149..9a581faaa742 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
return 0;
}
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+ u32 tmp;
+ struct dtsec_regs __iomem *regs = dtsec->regs;
+
+ if (!is_init_done(dtsec->dtsec_drv_param))
+ return -EINVAL;
+
+ tmp = ioread32be(&regs->rctrl);
+ if (enable)
+ tmp |= RCTRL_MPROM;
+ else
+ tmp &= ~RCTRL_MPROM;
+
+ iowrite32be(tmp, &regs->rctrl);
+
+ return 0;
+}
+
int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
{
struct dtsec_regs __iomem *regs = dtsec->regs;
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
index c4467c072058..1a689adf5a22 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
#endif /* __DTSEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index c0296880feba..446a97b792e3 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -350,6 +350,7 @@ struct fman_mac {
struct fman_rev_info fm_rev_info;
bool basex_if;
struct phy_device *pcsphy;
+ bool allmulti_enabled;
};
static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
return 0;
}
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+ u32 entry;
+ struct memac_regs __iomem *regs = memac->regs;
+
+ if (!is_init_done(memac->memac_drv_param))
+ return -EINVAL;
+
+ if (enable) {
+ for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry | HASH_CTRL_MCAST_EN,
+ &regs->hashtable_ctrl);
+ } else {
+ for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+ &regs->hashtable_ctrl);
+ }
+
+ memac->allmulti_enabled = enable;
+
+ return 0;
+}
+
int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
{
struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
break;
}
}
- if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
- iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+ if (!memac->allmulti_enabled) {
+ if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+ iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+ &regs->hashtable_ctrl);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h
index c4a66469a907..b5a50338ed9a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.h
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
enum fman_mac_exceptions exception, bool enable);
int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
#endif /* __MEMAC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 4b0f3a50b293..284735d4ebe9 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -217,6 +217,7 @@ struct fman_mac {
struct tgec_cfg *cfg;
void *fm;
struct fman_rev_info fm_rev_info;
+ bool allmulti_enabled;
};
static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
return 0;
}
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+ u32 entry;
+ struct tgec_regs __iomem *regs = tgec->regs;
+
+ if (!is_init_done(tgec->cfg))
+ return -EINVAL;
+
+ if (enable) {
+ for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry | TGEC_HASH_MCAST_EN,
+ &regs->hashtable_ctrl);
+ } else {
+ for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+ iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+ &regs->hashtable_ctrl);
+ }
+
+ tgec->allmulti_enabled = enable;
+
+ return 0;
+}
+
int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
{
struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
break;
}
}
- if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
- iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
- &regs->hashtable_ctrl);
+
+ if (!tgec->allmulti_enabled) {
+ if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+ iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+ &regs->hashtable_ctrl);
+ }
return 0;
}
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h
index 514bba9f47ce..cbbd3b422a98 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
#endif /* __TGEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 88c0a0636b44..4829dcd9e077 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
mac_dev->set_tx_pause = dtsec_set_tx_pause_frames;
mac_dev->set_rx_pause = dtsec_accept_rx_pause_frames;
mac_dev->set_exception = dtsec_set_exception;
+ mac_dev->set_allmulti = dtsec_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
mac_dev->set_tx_pause = tgec_set_tx_pause_frames;
mac_dev->set_rx_pause = tgec_accept_rx_pause_frames;
mac_dev->set_exception = tgec_set_exception;
+ mac_dev->set_allmulti = tgec_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
mac_dev->set_tx_pause = memac_set_tx_pause_frames;
mac_dev->set_rx_pause = memac_accept_rx_pause_frames;
mac_dev->set_exception = memac_set_exception;
+ mac_dev->set_allmulti = memac_set_allmulti;
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index eefb3357e304..b520cec120ee 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -59,6 +59,7 @@ struct mac_device {
bool rx_pause_active;
bool tx_pause_active;
bool promisc;
+ bool allmulti;
int (*init)(struct mac_device *mac_dev);
int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
void (*adjust_link)(struct mac_device *mac_dev);
int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+ int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
int (*set_multi)(struct net_device *net_dev,
struct mac_device *mac_dev);
int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5a86a916492c..765407179fdd 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
static void send_map_query(struct ibmvnic_adapter *adapter);
static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
static void send_cap_queries(struct ibmvnic_adapter *adapter);
static int init_sub_crqs(struct ibmvnic_adapter *);
static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -809,8 +809,11 @@ static int ibmvnic_login(struct net_device *netdev)
}
reinit_completion(&adapter->init_done);
- send_login(adapter);
- if (!wait_for_completion_timeout(&adapter->init_done,
+ rc = send_login(adapter);
+ if (rc) {
+ dev_err(dev, "Unable to attempt device login\n");
+ return rc;
+ } else if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) {
dev_err(dev, "Login timeout\n");
return -1;
@@ -845,8 +848,6 @@ static void release_resources(struct ibmvnic_adapter *adapter)
release_tx_pools(adapter);
release_rx_pools(adapter);
- release_stats_token(adapter);
- release_stats_buffers(adapter);
release_error_buffers(adapter);
release_napi(adapter);
release_login_rsp_buffer(adapter);
@@ -974,14 +975,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
if (rc)
return rc;
- rc = init_stats_buffers(adapter);
- if (rc)
- return rc;
-
- rc = init_stats_token(adapter);
- if (rc)
- return rc;
-
adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
if (!adapter->vpd)
return -ENOMEM;
@@ -1091,6 +1084,7 @@ static int ibmvnic_open(struct net_device *netdev)
static void clean_rx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_rx_pool *rx_pool;
+ struct ibmvnic_rx_buff *rx_buff;
u64 rx_entries;
int rx_scrqs;
int i, j;
@@ -1104,14 +1098,15 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
/* Free any remaining skbs in the rx buffer pools */
for (i = 0; i < rx_scrqs; i++) {
rx_pool = &adapter->rx_pool[i];
- if (!rx_pool)
+ if (!rx_pool || !rx_pool->rx_buff)
continue;
netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
for (j = 0; j < rx_entries; j++) {
- if (rx_pool->rx_buff[j].skb) {
- dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
- rx_pool->rx_buff[j].skb = NULL;
+ rx_buff = &rx_pool->rx_buff[j];
+ if (rx_buff && rx_buff->skb) {
+ dev_kfree_skb_any(rx_buff->skb);
+ rx_buff->skb = NULL;
}
}
}
@@ -1120,6 +1115,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
static void clean_tx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_tx_pool *tx_pool;
+ struct ibmvnic_tx_buff *tx_buff;
u64 tx_entries;
int tx_scrqs;
int i, j;
@@ -1133,14 +1129,15 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
/* Free any remaining skbs in the tx buffer pools */
for (i = 0; i < tx_scrqs; i++) {
tx_pool = &adapter->tx_pool[i];
- if (!tx_pool)
+ if (!tx_pool && !tx_pool->tx_buff)
continue;
netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
for (j = 0; j < tx_entries; j++) {
- if (tx_pool->tx_buff[j].skb) {
- dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
- tx_pool->tx_buff[j].skb = NULL;
+ tx_buff = &tx_pool->tx_buff[j];
+ if (tx_buff && tx_buff->skb) {
+ dev_kfree_skb_any(tx_buff->skb);
+ tx_buff->skb = NULL;
}
}
}
@@ -1482,6 +1479,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if ((*hdrs >> 7) & 1) {
build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
tx_crq.v1.n_crq_elem = num_entries;
+ tx_buff->num_entries = num_entries;
tx_buff->indir_arr[0] = tx_crq;
tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
sizeof(tx_buff->indir_arr),
@@ -1500,6 +1498,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
(u64)tx_buff->indir_dma,
(u64)num_entries);
} else {
+ tx_buff->num_entries = num_entries;
lpar_rc = send_subcrq(adapter, handle_array[queue_num],
&tx_crq);
}
@@ -1532,11 +1531,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
if (atomic_add_return(num_entries, &tx_scrq->used)
>= adapter->req_tx_entries_per_subcrq) {
- netdev_info(netdev, "Stopping queue %d\n", queue_num);
+ netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
netif_stop_subqueue(netdev, queue_num);
}
- tx_buff->num_entries = num_entries;
tx_packets++;
tx_bytes += skb->len;
txq->trans_start = jiffies;
@@ -2546,8 +2544,8 @@ restart_loop:
__netif_subqueue_stopped(adapter->netdev,
scrq->pool_index)) {
netif_wake_subqueue(adapter->netdev, scrq->pool_index);
- netdev_info(adapter->netdev, "Started queue %d\n",
- scrq->pool_index);
+ netdev_dbg(adapter->netdev, "Started queue %d\n",
+ scrq->pool_index);
}
}
@@ -3079,7 +3077,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
strncpy(&vlcd->name, adapter->netdev->name, len);
}
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
struct ibmvnic_login_buffer *login_buffer;
@@ -3095,6 +3093,12 @@ static void send_login(struct ibmvnic_adapter *adapter)
struct vnic_login_client_data *vlcd;
int i;
+ if (!adapter->tx_scrq || !adapter->rx_scrq) {
+ netdev_err(adapter->netdev,
+ "RX or TX queues are not allocated, device login failed\n");
+ return -1;
+ }
+
release_login_rsp_buffer(adapter);
client_data_len = vnic_client_data_len(adapter);
@@ -3192,7 +3196,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
crq.login.len = cpu_to_be32(buffer_size);
ibmvnic_send_crq(adapter, &crq);
- return;
+ return 0;
buf_rsp_map_failed:
kfree(login_rsp_buffer);
@@ -3201,7 +3205,7 @@ buf_rsp_alloc_failed:
buf_map_failed:
kfree(login_buffer);
buf_alloc_failed:
- return;
+ return -1;
}
static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,
@@ -4430,6 +4434,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
release_crq_queue(adapter);
}
+ rc = init_stats_buffers(adapter);
+ if (rc)
+ return rc;
+
+ rc = init_stats_token(adapter);
+ if (rc)
+ return rc;
+
return rc;
}
@@ -4537,6 +4549,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
release_sub_crqs(adapter, 1);
release_crq_queue(adapter);
+ release_stats_token(adapter);
+ release_stats_buffers(adapter);
+
adapter->state = VNIC_REMOVED;
mutex_unlock(&adapter->reset_lock);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 736a9f087bc9..c58a5377a287 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
* fm10k_read_hw_stats_32b - Reads value of 32-bit registers
* @hw: pointer to the hardware structure
* @addr: address of register containing a 32-bit value
+ * @stat: pointer to structure holding hw stat information
*
* Function reads the content of the register and returns the delta
* between the base and the current value.
@@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
* fm10k_read_hw_stats_48b - Reads value of 48-bit registers
* @hw: pointer to the hardware structure
* @addr: address of register containing the lower 32-bit value
+ * @stat: pointer to structure holding hw stat information
*
* Function reads the content of 2 registers, combined to represent a 48-bit
* statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
/**
* fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- * @hw: pointer to the hardware structure
* @q: pointer to the ring of hardware statistics queue
* @idx: index pointing to the start of the ring iteration
* @count: number of queues to iterate over
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 8e12aae065d8..2c93d719438f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,13 +28,13 @@
#include "fm10k.h"
-#define DRV_VERSION "0.22.1-k"
+#define DRV_VERSION "0.23.4-k"
#define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver"
const char fm10k_driver_version[] = DRV_VERSION;
char fm10k_driver_name[] = "fm10k";
static const char fm10k_driver_string[] = DRV_SUMMARY;
static const char fm10k_copyright[] =
- "Copyright(c) 2013 - 2017 Intel Corporation.";
+ "Copyright(c) 2013 - 2018 Intel Corporation.";
MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
MODULE_DESCRIPTION(DRV_SUMMARY);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index a38ae5c54da3..75c99aed3c41 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
/**
* fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
* @ti: Tunnel endpoint information
*
* This function is called when a new UDP tunnel port has been added.
@@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
/**
* fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
*
* This function is called when a new UDP tunnel port is deleted. The freed
* port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
* @glort: the target glort for this update
* @addr: the address to update
* @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
*
* This function queues up a MAC request for sending to the switch manager.
* A separate thread monitors the queue and sends updates to the switch
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index a434fecfdfeb..50f53e403ef5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
[fm10k_device_vf] = &fm10k_vf_info,
};
-/**
+/*
* fm10k_pci_tbl - PCI Device ID Table
*
* Wildcard entries (PCI_ANY_ID) should come last
@@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
/**
* fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
**/
static void fm10k_service_timer(struct timer_list *t)
{
@@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
/**
* fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
**/
static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
{
@@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
/**
* fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
**/
static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
{
@@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
/**
* fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
*
* This function serves two purposes. First it strobes the interrupt lines
* in order to make certain interrupts are occurring. Secondly it sets the
@@ -1995,6 +1995,7 @@ skip_tx_dma_drain:
/**
* fm10k_sw_init - Initialize general software structures
* @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
*
* fm10k_sw_init initializes the interface private data structure.
* Fields are initialized based on PCI device information and
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index d6406fc31ffb..bee192fe2ffb 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
/**
* fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
* @vid: VLAN ID to correct
*
* Will report an error if the VLAN ID is out of range. For VID = 0, it will
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index f8e87bf086b9..9d0d31da426b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
* @msg: Pointer to message block
* @attr_id: Attribute ID
* @mac_addr: MAC address to be stored
+ * @vlan: VLAN to be stored
*
* This function will reorder a MAC address to be CPU endian and store it
* in the attribute buffer. It will return success if provided with a
@@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
/**
* fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
* @attr: Pointer to attribute
- * @attr_id: Attribute ID
* @mac_addr: location of buffer to store MAC address
+ * @vlan: location of buffer to store VLAN
*
* This function pulls the MAC address back out of the attribute and will
* place it in the array pointed by by mac_addr. It will return success
@@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
* @hw: Pointer to hardware structure
* @msg: Pointer to message
* @mbx: Pointer to mailbox information structure
- * @func: Function array containing list of message handling functions
+ * @data: Pointer to message handler data structure
*
* This function should be the first function called upon receiving a
* message. The handler will identify the message type and call the correct
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index a852775d3059..0dfc52772c45 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type {
I40E_PHY_TYPE_DEFAULT = 0xFF,
};
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+ BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+ BIT_ULL(I40E_PHY_TYPE_XFI) | \
+ BIT_ULL(I40E_PHY_TYPE_SFI) | \
+ BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+ BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+ BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+ BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+ BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+ BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+ BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+ BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+ BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
#define I40E_LINK_SPEED_100MB_SHIFT 0x1
#define I40E_LINK_SPEED_1000MB_SHIFT 0x2
#define I40E_LINK_SPEED_10GB_SHIFT 0x3
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0dcbbda164c4..89807e32a898 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+ I40E_PRIV_FLAG("link-down-on-close",
+ I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
I40E_PRIV_FLAG("disable-source-pruning",
I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f6d37456f3b7..be9a1467a1a1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6547,6 +6547,75 @@ int i40e_up(struct i40e_vsi *vsi)
}
/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ struct i40e_aq_set_phy_config config = {0};
+ struct i40e_hw *hw = &pf->hw;
+ i40e_status err;
+ u64 mask;
+
+ /* Get the current phy config */
+ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+ NULL);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "failed to get phy cap., ret = %s last_status = %s\n",
+ i40e_stat_str(hw, err),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return err;
+ }
+
+ /* If link needs to go up, but was not forced to go down,
+ * no need for a flap
+ */
+ if (is_up && abilities.phy_type != 0)
+ return I40E_SUCCESS;
+
+ /* To force link we need to set bits for all supported PHY types,
+ * but there are now more than 32, so we need to split the bitmap
+ * across two fields.
+ */
+ mask = I40E_PHY_TYPES_BITMASK;
+ config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+ config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+ /* Copy the old settings, except of phy_type */
+ config.abilities = abilities.abilities;
+ config.link_speed = abilities.link_speed;
+ config.eee_capability = abilities.eee_capability;
+ config.eeer = abilities.eeer_val;
+ config.low_power_ctrl = abilities.d3_lpan;
+ err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "set phy config ret = %s last_status = %s\n",
+ i40e_stat_str(&pf->hw, err),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return err;
+ }
+
+ /* Update the link info */
+ err = i40e_update_link_info(hw);
+ if (err) {
+ /* Wait a little bit (on 40G cards it sometimes takes a really
+ * long time for link to come back from the atomic reset)
+ * and try once more
+ */
+ msleep(1000);
+ i40e_update_link_info(hw);
+ }
+
+ i40e_aq_set_link_restart_an(hw, true, NULL);
+
+ return I40E_SUCCESS;
+}
+
+/**
* i40e_down - Shutdown the connection processing
* @vsi: the VSI being stopped
**/
@@ -6563,6 +6632,9 @@ void i40e_down(struct i40e_vsi *vsi)
}
i40e_vsi_disable_irq(vsi);
i40e_vsi_stop_rings(vsi);
+ if (vsi->type == I40E_VSI_MAIN &&
+ vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+ i40e_force_link_state(vsi->back, false);
i40e_napi_disable_all(vsi);
for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -7524,6 +7596,9 @@ int i40e_open(struct net_device *netdev)
netif_carrier_off(netdev);
+ if (i40e_force_link_state(pf, true))
+ return -EAGAIN;
+
err = i40e_vsi_open(vsi);
if (err)
return err;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 1ec9b1d8023d..97cfe944b568 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
{
u32 head, tail;
- head = i40e_get_head(ring);
- tail = readl(ring->tail);
+ if (!in_sw) {
+ head = i40e_get_head(ring);
+ tail = readl(ring->tail);
+ } else {
+ head = ring->next_to_clean;
+ tail = ring->next_to_use;
+ }
if (head != tail)
return (head < tail) ?
@@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
*/
smp_rmb();
tx_ring->tx_stats.prev_pkt_ctr =
- i40e_get_tx_pending(tx_ring) ? packets : -1;
+ i40e_get_tx_pending(tx_ring, true) ? packets : -1;
}
}
}
@@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
* them to be written back in case we stay in NAPI.
* In this mode on X722 we do not enable Interrupt.
*/
- unsigned int j = i40e_get_tx_pending(tx_ring);
+ unsigned int j = i40e_get_tx_pending(tx_ring, false);
if (budget &&
((j / WB_STRIDE) == 0) && (j > 0) &&
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index f75a8fe68fcf..3c80ea784389 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -505,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
void i40e_free_rx_resources(struct i40e_ring *rx_ring);
int i40e_napi_poll(struct napi_struct *napi, int budget);
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 5cca083da93c..e23975c67417 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3062,7 +3062,7 @@ static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
for (i = 0; i < vf->num_tc ; i++) {
vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
- if (vsi->seid == seid)
+ if (vsi && vsi->seid == seid)
return vsi;
}
return NULL;
@@ -3146,8 +3146,8 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
dev_info(&pf->pdev->dev,
"VF %d: Invalid input, can't apply cloud filter\n",
vf->vf_id);
- aq_ret = I40E_ERR_PARAM;
- goto err;
+ aq_ret = I40E_ERR_PARAM;
+ goto err;
}
memset(&cfilter, 0, sizeof(cfilter));
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index eb8f3e327f6b..e088d23eb083 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
*/
smp_rmb();
tx_ring->tx_stats.prev_pkt_ctr =
- i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+ i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
}
}
}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 4955ce3ab6a2..dae121877935 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -815,13 +815,11 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
if (!macaddr)
return NULL;
- spin_lock_bh(&adapter->mac_vlan_list_lock);
-
f = i40evf_find_filter(adapter, macaddr);
if (!f) {
f = kzalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
- goto clearout;
+ return f;
ether_addr_copy(f->macaddr, macaddr);
@@ -832,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
f->remove = false;
}
-clearout:
- spin_unlock_bh(&adapter->mac_vlan_list_lock);
return f;
}
@@ -868,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
}
+ f = i40evf_add_filter(adapter, addr->sa_data);
+
spin_unlock_bh(&adapter->mac_vlan_list_lock);
- f = i40evf_add_filter(adapter, addr->sa_data);
if (f) {
ether_addr_copy(hw->mac.addr, addr->sa_data);
ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -2493,6 +2490,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
u16 addr_type = 0;
u16 n_proto = 0;
int i = 0;
+ struct virtchnl_filter *vf = &filter->f;
if (f->dissector->used_keys &
~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
@@ -2540,7 +2538,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
return -EINVAL;
if (n_proto == ETH_P_IPV6) {
/* specify flow type as TCP IPv6 */
- filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW;
+ vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
}
if (key->ip_proto != IPPROTO_TCP) {
@@ -2585,9 +2583,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
is_multicast_ether_addr(key->dst)) {
/* set the mask if a valid dst_mac address */
for (i = 0; i < ETH_ALEN; i++)
- filter->f.mask.tcp_spec.dst_mac[i] |=
- 0xff;
- ether_addr_copy(filter->f.data.tcp_spec.dst_mac,
+ vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+ ether_addr_copy(vf->data.tcp_spec.dst_mac,
key->dst);
}
@@ -2596,9 +2593,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
is_multicast_ether_addr(key->src)) {
/* set the mask if a valid dst_mac address */
for (i = 0; i < ETH_ALEN; i++)
- filter->f.mask.tcp_spec.src_mac[i] |=
- 0xff;
- ether_addr_copy(filter->f.data.tcp_spec.src_mac,
+ vf->mask.tcp_spec.src_mac[i] |= 0xff;
+ ether_addr_copy(vf->data.tcp_spec.src_mac,
key->src);
}
}
@@ -2622,8 +2618,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
return I40E_ERR_CONFIG;
}
}
- filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
- filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+ vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
}
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
@@ -2670,14 +2666,12 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
return I40E_ERR_CONFIG;
}
if (key->dst) {
- filter->f.mask.tcp_spec.dst_ip[0] |=
- cpu_to_be32(0xffffffff);
- filter->f.data.tcp_spec.dst_ip[0] = key->dst;
+ vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+ vf->data.tcp_spec.dst_ip[0] = key->dst;
}
if (key->src) {
- filter->f.mask.tcp_spec.src_ip[0] |=
- cpu_to_be32(0xffffffff);
- filter->f.data.tcp_spec.src_ip[0] = key->src;
+ vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+ vf->data.tcp_spec.src_ip[0] = key->src;
}
}
@@ -2710,22 +2704,14 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
field_flags |= I40EVF_CLOUD_FIELD_IIP;
- if (key->dst.s6_addr) {
- for (i = 0; i < 4; i++)
- filter->f.mask.tcp_spec.dst_ip[i] |=
- cpu_to_be32(0xffffffff);
- memcpy(&filter->f.data.tcp_spec.dst_ip,
- &key->dst.s6_addr32,
- sizeof(filter->f.data.tcp_spec.dst_ip));
- }
- if (key->src.s6_addr) {
- for (i = 0; i < 4; i++)
- filter->f.mask.tcp_spec.src_ip[i] |=
- cpu_to_be32(0xffffffff);
- memcpy(&filter->f.data.tcp_spec.src_ip,
- &key->src.s6_addr32,
- sizeof(filter->f.data.tcp_spec.src_ip));
- }
+ for (i = 0; i < 4; i++)
+ vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+ memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+ sizeof(vf->data.tcp_spec.dst_ip));
+ for (i = 0; i < 4; i++)
+ vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+ memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+ sizeof(vf->data.tcp_spec.src_ip));
}
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
struct flow_dissector_key_ports *key =
@@ -2757,16 +2743,16 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
}
}
if (key->dst) {
- filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
- filter->f.data.tcp_spec.dst_port = key->dst;
+ vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.dst_port = key->dst;
}
if (key->src) {
- filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
- filter->f.data.tcp_spec.src_port = key->dst;
+ vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+ vf->data.tcp_spec.src_port = key->dst;
}
}
- filter->f.field_flags = field_flags;
+ vf->field_flags = field_flags;
return 0;
}
@@ -3040,7 +3026,12 @@ static int i40evf_open(struct net_device *netdev)
if (err)
goto err_req_irq;
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+
i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
i40evf_configure(adapter);
i40evf_up_complete(adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 6134b61e0938..3c76c817ca1a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1048,24 +1048,28 @@ void i40evf_disable_channels(struct i40evf_adapter *adapter)
* Print the cloud filter
**/
static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
- struct virtchnl_filter f)
+ struct virtchnl_filter *f)
{
- switch (f.flow_type) {
+ switch (f->flow_type) {
case VIRTCHNL_TCP_V4_FLOW:
dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
- &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
- ntohs(f.data.tcp_spec.vlan_id),
- &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0],
- ntohs(f.data.tcp_spec.dst_port),
- ntohs(f.data.tcp_spec.src_port));
+ &f->data.tcp_spec.dst_mac,
+ &f->data.tcp_spec.src_mac,
+ ntohs(f->data.tcp_spec.vlan_id),
+ &f->data.tcp_spec.dst_ip[0],
+ &f->data.tcp_spec.src_ip[0],
+ ntohs(f->data.tcp_spec.dst_port),
+ ntohs(f->data.tcp_spec.src_port));
break;
case VIRTCHNL_TCP_V6_FLOW:
dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
- &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
- ntohs(f.data.tcp_spec.vlan_id),
- &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip,
- ntohs(f.data.tcp_spec.dst_port),
- ntohs(f.data.tcp_spec.src_port));
+ &f->data.tcp_spec.dst_mac,
+ &f->data.tcp_spec.src_mac,
+ ntohs(f->data.tcp_spec.vlan_id),
+ &f->data.tcp_spec.dst_ip,
+ &f->data.tcp_spec.src_ip,
+ ntohs(f->data.tcp_spec.dst_port),
+ ntohs(f->data.tcp_spec.src_port));
break;
}
}
@@ -1303,7 +1307,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
i40evf_stat_str(&adapter->hw,
v_retval));
i40evf_print_cloud_filter(adapter,
- cf->f);
+ &cf->f);
list_del(&cf->list);
kfree(cf);
adapter->num_cloud_filters--;
@@ -1322,7 +1326,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
i40evf_stat_str(&adapter->hw,
v_retval));
i40evf_print_cloud_filter(adapter,
- cf->f);
+ &cf->f);
}
}
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 221f15803480..89edb9fae6f0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -3059,6 +3059,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
for (i = 0; i < reta_entries; i++)
adapter->rss_indir_tbl[i] = indir[i];
+
+ ixgbe_store_reta(adapter);
}
/* Fill out the rss hash key */
@@ -3067,8 +3069,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
ixgbe_store_key(adapter);
}
- ixgbe_store_reta(adapter);
-
return 0;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 4242f0213e46..ed4cbe94c355 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
return false;
/* start at VMDq register offset for SR-IOV enabled setups */
- pool = 0;
reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
/* If we are greater than indices move to next pool */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0da5aa2c8aba..b032091022a8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7703,7 +7703,8 @@ static void ixgbe_service_task(struct work_struct *work)
if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
ixgbe_ptp_overflow_check(adapter);
- ixgbe_ptp_rx_hang(adapter);
+ if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+ ixgbe_ptp_rx_hang(adapter);
ixgbe_ptp_tx_hang(adapter);
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 4400e49090b4..e7623fed42da 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -94,6 +94,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
#define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0)
+ "legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
static int ixgbevf_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *cmd)
{
@@ -241,6 +248,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev,
sizeof(drvinfo->version));
strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
sizeof(drvinfo->bus_info));
+
+ drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
}
static void ixgbevf_get_ringparam(struct net_device *netdev,
@@ -392,6 +401,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
return IXGBEVF_TEST_LEN;
case ETH_SS_STATS:
return IXGBEVF_STATS_LEN;
+ case ETH_SS_PRIV_FLAGS:
+ return IXGBEVF_PRIV_FLAGS_STR_LEN;
default:
return -EINVAL;
}
@@ -496,6 +507,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
p += ETH_GSTRING_LEN;
}
break;
+ case ETH_SS_PRIV_FLAGS:
+ memcpy(data, ixgbevf_priv_flags_strings,
+ IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+ break;
}
}
@@ -888,6 +903,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
return err;
}
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ u32 priv_flags = 0;
+
+ if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+ priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+ return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ unsigned int flags = adapter->flags;
+
+ flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+ if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+ flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+ if (flags != adapter->flags) {
+ adapter->flags = flags;
+
+ /* reset interface to repopulate queues */
+ if (netif_running(netdev))
+ ixgbevf_reinit_locked(adapter);
+ }
+
+ return 0;
+}
+
static const struct ethtool_ops ixgbevf_ethtool_ops = {
.get_drvinfo = ixgbevf_get_drvinfo,
.get_regs_len = ixgbevf_get_regs_len,
@@ -909,6 +955,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
.get_rxfh_key_size = ixgbevf_get_rxfh_key_size,
.get_rxfh = ixgbevf_get_rxfh,
.get_link_ksettings = ixgbevf_get_link_ksettings,
+ .get_priv_flags = ixgbevf_get_priv_flags,
+ .set_priv_flags = ixgbevf_set_priv_flags,
};
void ixgbevf_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index f6952425c87d..f65ca156af2d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -89,19 +89,15 @@ struct ixgbevf_rx_queue_stats {
};
enum ixgbevf_ring_state_t {
+ __IXGBEVF_RX_3K_BUFFER,
+ __IXGBEVF_RX_BUILD_SKB_ENABLED,
__IXGBEVF_TX_DETECT_HANG,
__IXGBEVF_HANG_CHECK_ARMED,
};
-#define check_for_tx_hang(ring) \
- test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
- set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
- clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-
struct ixgbevf_ring {
struct ixgbevf_ring *next;
+ struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */
struct net_device *netdev;
struct device *dev;
void *desc; /* descriptor ring memory */
@@ -133,7 +129,7 @@ struct ixgbevf_ring {
*/
u16 reg_idx;
int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define IXGBEVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */
@@ -156,12 +152,20 @@ struct ixgbevf_ring {
/* Supported Rx Buffer Sizes */
#define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */
#define IXGBEVF_RXBUFFER_2048 2048
+#define IXGBEVF_RXBUFFER_3072 3072
#define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ IXGBEVF_RXBUFFER_2048
#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+ (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048
+#endif
+
#define IXGBE_TX_FLAGS_CSUM BIT(0)
#define IXGBE_TX_FLAGS_VLAN BIT(1)
#define IXGBE_TX_FLAGS_TSO BIT(2)
@@ -170,6 +174,50 @@ struct ixgbevf_ring {
#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000
#define IXGBE_TX_FLAGS_VLAN_SHIFT 16
+#define ring_uses_large_buffer(ring) \
+ test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+ set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+ clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+ test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+ set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+ clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return IXGBEVF_RXBUFFER_3072;
+
+ if (ring_uses_build_skb(ring))
+ return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+ return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+ if (ring_uses_large_buffer(ring))
+ return 1;
+#endif
+ return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+ test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+ set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+ clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
struct ixgbevf_ring_container {
struct ixgbevf_ring *ring; /* pointer to linked list of rings */
unsigned int total_bytes; /* total bytes processed this int */
@@ -194,7 +242,11 @@ struct ixgbevf_q_vector {
u16 itr; /* Interrupt throttle rate written to EITR */
struct napi_struct napi;
struct ixgbevf_ring_container rx, tx;
+ struct rcu_head rcu; /* to avoid race with update stats on free */
char name[IFNAMSIZ + 9];
+
+ /* for dynamic allocation of rings associated with this q_vector */
+ struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int state;
#define IXGBEVF_QV_STATE_IDLE 0
@@ -331,6 +383,8 @@ struct ixgbevf_adapter {
u32 *rss_key;
u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+ u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX BIT(1)
};
enum ixbgevf_state_t {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 9b3d43d28106..f37307131eb6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -130,6 +130,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *old_buff);
static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
{
@@ -527,6 +530,49 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+ const unsigned int size)
+{
+ struct ixgbevf_rx_buffer *rx_buffer;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ prefetchw(rx_buffer->page);
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ size,
+ DMA_FROM_DEVICE);
+
+ rx_buffer->pagecnt_bias--;
+
+ return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer)
+{
+ if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+ /* hand second half of page back to the ring */
+ ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ /* We are not reusing the buffer so unmap it and free
+ * any references we are holding to it
+ */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ ixgbevf_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IXGBEVF_RX_DMA_ATTR);
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
+ }
+
+ /* clear contents of rx_buffer */
+ rx_buffer->page = NULL;
+}
+
/**
* ixgbevf_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
@@ -554,32 +600,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
return true;
}
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+ return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *bi)
{
struct page *page = bi->page;
- dma_addr_t dma = bi->dma;
+ dma_addr_t dma;
/* since we are recycling buffers we should seldom need to alloc */
if (likely(page))
return true;
/* alloc new page for storage */
- page = dev_alloc_page();
+ page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
}
/* map page for use */
- dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+ ixgbevf_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
- __free_page(page);
+ __free_pages(page, ixgbevf_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
@@ -587,7 +639,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
bi->dma = dma;
bi->page = page;
- bi->page_offset = 0;
+ bi->page_offset = ixgbevf_rx_offset(rx_ring);
bi->pagecnt_bias = 1;
rx_ring->rx_stats.alloc_rx_page++;
@@ -621,7 +673,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
- IXGBEVF_RX_BUFSZ,
+ ixgbevf_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
/* Refresh the desc even if pkt_addr didn't change
@@ -734,11 +786,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
}
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
- struct page *page,
- const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
{
- unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+ unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+ struct page *page = rx_buffer->page;
/* avoid re-using remote pages */
if (unlikely(ixgbevf_page_is_reserved(page)))
@@ -746,17 +797,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely(page_ref_count(page) != pagecnt_bias))
+ if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
return false;
-
- /* flip page offset to other buffer */
- rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
#else
- /* move offset up to the next cache line */
- rx_buffer->page_offset += truesize;
+#define IXGBEVF_LAST_OFFSET \
+ (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
- if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+ if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
return false;
#endif
@@ -765,7 +812,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
* the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds.
*/
- if (unlikely(pagecnt_bias == 1)) {
+ if (unlikely(!pagecnt_bias)) {
page_ref_add(page, USHRT_MAX);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
@@ -777,127 +824,81 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
* ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
* @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
*
* This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
**/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
- u16 size,
- union ixgbe_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ unsigned int size)
{
- struct page *page = rx_buffer->page;
- unsigned char *va = page_address(page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192)
- unsigned int truesize = IXGBEVF_RX_BUFSZ;
+ unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
#else
- unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+ unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+ SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+ SKB_DATA_ALIGN(size);
+#endif
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+ rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
#endif
- unsigned int pull_len;
-
- if (unlikely(skb_is_nonlinear(skb)))
- goto add_tail_frag;
-
- if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
- memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
- /* page is not reserved, we can reuse buffer as is */
- if (likely(!ixgbevf_page_is_reserved(page)))
- return true;
-
- /* this page cannot be reused so discard it */
- return false;
- }
-
- /* we need the header to contain the greater of either ETH_HLEN or
- * 60 bytes if the skb->len is less than 60 for skb_pad.
- */
- pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
-
- /* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
-
- /* update all of the pointers */
- va += pull_len;
- size -= pull_len;
-
-add_tail_frag:
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
- (unsigned long)va & ~PAGE_MASK, size, truesize);
-
- return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
}
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
- union ixgbe_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer,
+ union ixgbe_adv_rx_desc *rx_desc,
+ unsigned int size)
{
- struct ixgbevf_rx_buffer *rx_buffer;
- struct page *page;
- u16 size = le16_to_cpu(rx_desc->wb.upper.length);
-
- rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
- page = rx_buffer->page;
- prefetchw(page);
-
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_buffer->dma,
- rx_buffer->page_offset,
- size,
- DMA_FROM_DEVICE);
-
- if (likely(!skb)) {
- void *page_addr = page_address(page) +
- rx_buffer->page_offset;
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+ unsigned int headlen;
+ struct sk_buff *skb;
- /* prefetch first cache line of first page */
- prefetch(page_addr);
+ /* prefetch first cache line of first page */
+ prefetch(va);
#if L1_CACHE_BYTES < 128
- prefetch(page_addr + L1_CACHE_BYTES);
+ prefetch(va + L1_CACHE_BYTES);
#endif
- /* allocate a skb to store the frags */
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- IXGBEVF_RX_HDR_SIZE);
- if (unlikely(!skb)) {
- rx_ring->rx_stats.alloc_rx_buff_failed++;
- return NULL;
- }
+ /* allocate a skb to store the frags */
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+ if (unlikely(!skb))
+ return NULL;
- /* we will be copying header into skb->data in
- * pskb_may_pull so it is in our interest to prefetch
- * it now to avoid a possible cache miss
- */
- prefetchw(skb->data);
- }
+ /* Determine available headroom for copy */
+ headlen = size;
+ if (headlen > IXGBEVF_RX_HDR_SIZE)
+ headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
- /* pull page into skb */
- if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
- /* hand second half of page back to the ring */
- ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+ /* align pull length to size of long to optimize memcpy performance */
+ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+ /* update all of the pointers */
+ size -= headlen;
+ if (size) {
+ skb_add_rx_frag(skb, 0, rx_buffer->page,
+ (va + headlen) - page_address(rx_buffer->page),
+ size, truesize);
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
} else {
- /* We are not reusing the buffer so unmap it and free
- * any references we are holding to it
- */
- dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
- PAGE_SIZE, DMA_FROM_DEVICE,
- IXGBEVF_RX_DMA_ATTR);
- __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+ rx_buffer->pagecnt_bias++;
}
- /* clear contents of buffer_info */
- rx_buffer->dma = 0;
- rx_buffer->page = NULL;
-
return skb;
}
@@ -909,6 +910,44 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
}
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer,
+ union ixgbe_adv_rx_desc *rx_desc,
+ unsigned int size)
+{
+ void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+ unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+#endif
+ struct sk_buff *skb;
+
+ /* prefetch first cache line of first page */
+ prefetch(va);
+#if L1_CACHE_BYTES < 128
+ prefetch(va + L1_CACHE_BYTES);
+#endif
+
+ /* build an skb to around the page buffer */
+ skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* update pointers within the skb to store the data */
+ skb_reserve(skb, IXGBEVF_SKB_PAD);
+ __skb_put(skb, size);
+
+ /* update buffer offset */
+#if (PAGE_SIZE < 8192)
+ rx_buffer->page_offset ^= truesize;
+#else
+ rx_buffer->page_offset += truesize;
+#endif
+
+ return skb;
+}
static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
struct ixgbevf_ring *rx_ring,
int budget)
@@ -919,6 +958,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
+ struct ixgbevf_rx_buffer *rx_buffer;
+ unsigned int size;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
@@ -927,8 +968,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
}
rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
- if (!rx_desc->wb.upper.length)
+ size = le16_to_cpu(rx_desc->wb.upper.length);
+ if (!size)
break;
/* This memory barrier is needed to keep us from reading
@@ -937,15 +978,26 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
*/
rmb();
+ rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
/* retrieve a buffer from the ring */
- skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+ if (skb)
+ ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+ else if (ring_uses_build_skb(rx_ring))
+ skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+ rx_desc, size);
+ else
+ skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+ rx_desc, size);
/* exit if we failed to retrieve a buffer */
if (!skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
+ rx_buffer->pagecnt_bias++;
break;
}
+ ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
cleaned_count++;
/* fetch next buffer in frame if non-eop */
@@ -1260,85 +1312,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
return IRQ_HANDLED;
}
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
- int r_idx)
-{
- struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
- a->rx_ring[r_idx]->next = q_vector->rx.ring;
- q_vector->rx.ring = a->rx_ring[r_idx];
- q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
- int t_idx)
-{
- struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
- a->tx_ring[t_idx]->next = q_vector->tx.ring;
- q_vector->tx.ring = a->tx_ring[t_idx];
- q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code. Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible. You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
- int q_vectors;
- int v_start = 0;
- int rxr_idx = 0, txr_idx = 0;
- int rxr_remaining = adapter->num_rx_queues;
- int txr_remaining = adapter->num_tx_queues;
- int i, j;
- int rqpv, tqpv;
-
- q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
- /* The ideal configuration...
- * We have enough vectors to map one per queue.
- */
- if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
- for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
- map_vector_to_rxq(adapter, v_start, rxr_idx);
-
- for (; txr_idx < txr_remaining; v_start++, txr_idx++)
- map_vector_to_txq(adapter, v_start, txr_idx);
- return 0;
- }
-
- /* If we don't have enough vectors for a 1-to-1
- * mapping, we'll have to group them so there are
- * multiple queues per vector.
- */
- /* Re-adjusting *qpv takes care of the remainder. */
- for (i = v_start; i < q_vectors; i++) {
- rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
- for (j = 0; j < rqpv; j++) {
- map_vector_to_rxq(adapter, i, rxr_idx);
- rxr_idx++;
- rxr_remaining--;
- }
- }
- for (i = v_start; i < q_vectors; i++) {
- tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
- for (j = 0; j < tqpv; j++) {
- map_vector_to_txq(adapter, i, txr_idx);
- txr_idx++;
- txr_remaining--;
- }
- }
-
- return 0;
-}
-
/**
* ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
* @adapter: board private structure
@@ -1411,20 +1384,6 @@ free_queue_irqs:
return err;
}
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
- int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
- for (i = 0; i < q_vectors; i++) {
- struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
- q_vector->rx.ring = NULL;
- q_vector->tx.ring = NULL;
- q_vector->rx.count = 0;
- q_vector->tx.count = 0;
- }
-}
-
/**
* ixgbevf_request_irq - initialize interrupts
* @adapter: board private structure
@@ -1464,8 +1423,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
free_irq(adapter->msix_entries[i].vector,
adapter->q_vector[i]);
}
-
- ixgbevf_reset_q_vectors(adapter);
}
/**
@@ -1587,7 +1544,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *ring, int index)
{
struct ixgbe_hw *hw = &adapter->hw;
u32 srrctl;
@@ -1595,7 +1553,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
srrctl = IXGBE_SRRCTL_DROP_EN;
srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
- srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+ if (ring_uses_large_buffer(ring))
+ srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+ else
+ srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
@@ -1767,10 +1728,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
ring->next_to_use = 0;
ring->next_to_alloc = 0;
- ixgbevf_configure_srrctl(adapter, reg_idx);
+ ixgbevf_configure_srrctl(adapter, ring, reg_idx);
+
+ /* RXDCTL.RLPML does not work on 82599 */
+ if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+ rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+ IXGBE_RXDCTL_RLPML_EN);
- /* allow any size packet since we can handle overflow */
- rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+#if (PAGE_SIZE < 8192)
+ /* Limit the maximum frame size so we don't overrun the skb */
+ if (ring_uses_build_skb(ring) &&
+ !ring_uses_large_buffer(ring))
+ rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+ IXGBE_RXDCTL_RLPML_EN;
+#endif
+ }
rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
@@ -1779,6 +1751,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
}
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *rx_ring)
+{
+ struct net_device *netdev = adapter->netdev;
+ unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+ /* set build_skb and buffer size flags */
+ clear_ring_build_skb_enabled(rx_ring);
+ clear_ring_uses_large_buffer(rx_ring);
+
+ if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+ return;
+
+ set_ring_build_skb_enabled(rx_ring);
+
+#if (PAGE_SIZE < 8192)
+ if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+ return;
+
+ set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
/**
* ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
* @adapter: board private structure
@@ -1806,8 +1801,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
/* Setup the HW Rx Head and Tail Descriptor Pointers and
* the Base and Length of the Rx Descriptor Ring
*/
- for (i = 0; i < adapter->num_rx_queues; i++)
- ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+ ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+ ixgbevf_configure_rx_ring(adapter, rx_ring);
+ }
}
static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
@@ -2136,13 +2135,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_buffer->dma,
rx_buffer->page_offset,
- IXGBEVF_RX_BUFSZ,
+ ixgbevf_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(rx_ring->dev,
rx_buffer->dma,
- PAGE_SIZE,
+ ixgbevf_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
IXGBEVF_RX_DMA_ATTR);
@@ -2405,105 +2404,171 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
}
/**
- * ixgbevf_alloc_queues - Allocate memory for all rings
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+{
+ int vector, v_budget;
+
+ /* It's easy to be greedy for MSI-X vectors, but it really
+ * doesn't do us much good if we have a lot more vectors
+ * than CPU's. So let's be conservative and only ask for
+ * (roughly) the same number of vectors as there are CPU's.
+ * The default is to use pairs of vectors.
+ */
+ v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+ v_budget = min_t(int, v_budget, num_online_cpus());
+ v_budget += NON_Q_VECTORS;
+
+ adapter->msix_entries = kcalloc(v_budget,
+ sizeof(struct msix_entry), GFP_KERNEL);
+ if (!adapter->msix_entries)
+ return -ENOMEM;
+
+ for (vector = 0; vector < v_budget; vector++)
+ adapter->msix_entries[vector].entry = vector;
+
+ /* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+ * does not support any other modes, so we will simply fail here. Note
+ * that we clean up the msix_entries pointer else-where.
+ */
+ return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+ struct ixgbevf_ring_container *head)
+{
+ ring->next = head->ring;
+ head->ring = ring;
+ head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
* @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
*
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time. The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
**/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+ int txr_count, int txr_idx,
+ int rxr_count, int rxr_idx)
{
+ struct ixgbevf_q_vector *q_vector;
struct ixgbevf_ring *ring;
- int rx = 0, tx = 0;
+ int ring_count, size;
+
+ ring_count = txr_count + rxr_count;
+ size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+ /* allocate q_vector and rings */
+ q_vector = kzalloc(size, GFP_KERNEL);
+ if (!q_vector)
+ return -ENOMEM;
+
+ /* initialize NAPI */
+ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
+
+ /* tie q_vector and adapter together */
+ adapter->q_vector[v_idx] = q_vector;
+ q_vector->adapter = adapter;
+ q_vector->v_idx = v_idx;
- for (; tx < adapter->num_tx_queues; tx++) {
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- goto err_allocation;
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+ while (txr_count) {
+ /* assign generic ring traits */
ring->dev = &adapter->pdev->dev;
ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Tx values */
+ ixgbevf_add_ring(ring, &q_vector->tx);
+
+ /* apply Tx specific ring traits */
ring->count = adapter->tx_ring_count;
- ring->queue_index = tx;
- ring->reg_idx = tx;
+ ring->queue_index = txr_idx;
+ ring->reg_idx = txr_idx;
- adapter->tx_ring[tx] = ring;
- }
+ /* assign ring to adapter */
+ adapter->tx_ring[txr_idx] = ring;
+
+ /* update count and index */
+ txr_count--;
+ txr_idx++;
- for (; rx < adapter->num_rx_queues; rx++) {
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- goto err_allocation;
+ /* push pointer to next ring */
+ ring++;
+ }
+ while (rxr_count) {
+ /* assign generic ring traits */
ring->dev = &adapter->pdev->dev;
ring->netdev = adapter->netdev;
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Rx values */
+ ixgbevf_add_ring(ring, &q_vector->rx);
+
+ /* apply Rx specific ring traits */
ring->count = adapter->rx_ring_count;
- ring->queue_index = rx;
- ring->reg_idx = rx;
+ ring->queue_index = rxr_idx;
+ ring->reg_idx = rxr_idx;
- adapter->rx_ring[rx] = ring;
- }
+ /* assign ring to adapter */
+ adapter->rx_ring[rxr_idx] = ring;
- return 0;
+ /* update count and index */
+ rxr_count--;
+ rxr_idx++;
-err_allocation:
- while (tx) {
- kfree(adapter->tx_ring[--tx]);
- adapter->tx_ring[tx] = NULL;
+ /* push pointer to next ring */
+ ring++;
}
- while (rx) {
- kfree(adapter->rx_ring[--rx]);
- adapter->rx_ring[rx] = NULL;
- }
- return -ENOMEM;
+ return 0;
}
/**
- * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
* @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
*
- * Attempt to configure the interrupts using the best available
- * capabilities of the hardware and the kernel.
+ * This function frees the memory allocated to the q_vector. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
**/
-static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
{
- struct net_device *netdev = adapter->netdev;
- int err;
- int vector, v_budget;
-
- /* It's easy to be greedy for MSI-X vectors, but it really
- * doesn't do us much good if we have a lot more vectors
- * than CPU's. So let's be conservative and only ask for
- * (roughly) the same number of vectors as there are CPU's.
- * The default is to use pairs of vectors.
- */
- v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
- v_budget = min_t(int, v_budget, num_online_cpus());
- v_budget += NON_Q_VECTORS;
-
- /* A failure in MSI-X entry allocation isn't fatal, but it does
- * mean we disable MSI-X capabilities of the adapter.
- */
- adapter->msix_entries = kcalloc(v_budget,
- sizeof(struct msix_entry), GFP_KERNEL);
- if (!adapter->msix_entries)
- return -ENOMEM;
+ struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+ struct ixgbevf_ring *ring;
- for (vector = 0; vector < v_budget; vector++)
- adapter->msix_entries[vector].entry = vector;
+ ixgbevf_for_each_ring(ring, q_vector->tx)
+ adapter->tx_ring[ring->queue_index] = NULL;
- err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
- if (err)
- return err;
+ ixgbevf_for_each_ring(ring, q_vector->rx)
+ adapter->rx_ring[ring->queue_index] = NULL;
- err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
- if (err)
- return err;
+ adapter->q_vector[v_idx] = NULL;
+ netif_napi_del(&q_vector->napi);
- return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+ /* ixgbevf_get_stats() might access the rings on this vector,
+ * we must wait a grace period before freeing it.
+ */
+ kfree_rcu(q_vector, rcu);
}
/**
@@ -2515,35 +2580,53 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
**/
static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
{
- int q_idx, num_q_vectors;
- struct ixgbevf_q_vector *q_vector;
+ int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+ int rxr_remaining = adapter->num_rx_queues;
+ int txr_remaining = adapter->num_tx_queues;
+ int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+ int err;
+
+ if (q_vectors >= (rxr_remaining + txr_remaining)) {
+ for (; rxr_remaining; v_idx++, q_vectors--) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+
+ err = ixgbevf_alloc_q_vector(adapter, v_idx,
+ 0, 0, rqpv, rxr_idx);
+ if (err)
+ goto err_out;
- num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ rxr_idx += rqpv;
+ }
+ }
+
+ for (; q_vectors; v_idx++, q_vectors--) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+ int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
- for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
- q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
- if (!q_vector)
+ err = ixgbevf_alloc_q_vector(adapter, v_idx,
+ tqpv, txr_idx,
+ rqpv, rxr_idx);
+
+ if (err)
goto err_out;
- q_vector->adapter = adapter;
- q_vector->v_idx = q_idx;
- netif_napi_add(adapter->netdev, &q_vector->napi,
- ixgbevf_poll, 64);
- adapter->q_vector[q_idx] = q_vector;
+
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ rxr_idx += rqpv;
+ txr_remaining -= tqpv;
+ txr_idx += tqpv;
}
return 0;
err_out:
- while (q_idx) {
- q_idx--;
- q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
- napi_hash_del(&q_vector->napi);
-#endif
- netif_napi_del(&q_vector->napi);
- kfree(q_vector);
- adapter->q_vector[q_idx] = NULL;
+ while (v_idx) {
+ v_idx--;
+ ixgbevf_free_q_vector(adapter, v_idx);
}
+
return -ENOMEM;
}
@@ -2557,17 +2640,11 @@ err_out:
**/
static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
{
- int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
- for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
- struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
+ int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
- adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- napi_hash_del(&q_vector->napi);
-#endif
- netif_napi_del(&q_vector->napi);
- kfree(q_vector);
+ while (q_vectors) {
+ q_vectors--;
+ ixgbevf_free_q_vector(adapter, q_vectors);
}
}
@@ -2611,12 +2688,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
goto err_alloc_q_vectors;
}
- err = ixgbevf_alloc_queues(adapter);
- if (err) {
- pr_err("Unable to allocate memory for queues\n");
- goto err_alloc_queues;
- }
-
hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
(adapter->num_rx_queues > 1) ? "Enabled" :
"Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
@@ -2624,8 +2695,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
set_bit(__IXGBEVF_DOWN, &adapter->state);
return 0;
-err_alloc_queues:
- ixgbevf_free_q_vectors(adapter);
err_alloc_q_vectors:
ixgbevf_reset_interrupt_capability(adapter);
err_set_interrupt:
@@ -2641,17 +2710,6 @@ err_set_interrupt:
**/
static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
{
- int i;
-
- for (i = 0; i < adapter->num_tx_queues; i++) {
- kfree(adapter->tx_ring[i]);
- adapter->tx_ring[i] = NULL;
- }
- for (i = 0; i < adapter->num_rx_queues; i++) {
- kfree(adapter->rx_ring[i]);
- adapter->rx_ring[i] = NULL;
- }
-
adapter->num_tx_queues = 0;
adapter->num_rx_queues = 0;
@@ -3088,9 +3146,14 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
if (!err)
continue;
hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
- break;
+ goto err_setup_tx;
}
+ return 0;
+err_setup_tx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ ixgbevf_free_tx_resources(adapter->tx_ring[i]);
return err;
}
@@ -3148,8 +3211,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
if (!err)
continue;
hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
- break;
+ goto err_setup_rx;
}
+
+ return 0;
+err_setup_rx:
+ /* rewind the index freeing the rings as we go */
+ while (i--)
+ ixgbevf_free_rx_resources(adapter->rx_ring[i]);
return err;
}
@@ -3244,28 +3313,31 @@ int ixgbevf_open(struct net_device *netdev)
ixgbevf_configure(adapter);
- /* Map the Tx/Rx rings to the vectors we were allotted.
- * if request_irq will be called in this function map_rings
- * must be called *before* up_complete
- */
- ixgbevf_map_rings_to_vectors(adapter);
-
err = ixgbevf_request_irq(adapter);
if (err)
goto err_req_irq;
+ /* Notify the stack of the actual queue counts. */
+ err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+ if (err)
+ goto err_set_queues;
+
+ err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+ if (err)
+ goto err_set_queues;
+
ixgbevf_up_complete(adapter);
return 0;
+err_set_queues:
+ ixgbevf_free_irq(adapter);
err_req_irq:
- ixgbevf_down(adapter);
-err_setup_rx:
ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
ixgbevf_reset(adapter);
-
err_setup_reset:
return err;
@@ -3707,11 +3779,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
return __ixgbevf_maybe_stop_tx(tx_ring, size);
}
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+ struct ixgbevf_ring *tx_ring)
{
- struct ixgbevf_adapter *adapter = netdev_priv(netdev);
struct ixgbevf_tx_buffer *first;
- struct ixgbevf_ring *tx_ring;
int tso;
u32 tx_flags = 0;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
@@ -3726,8 +3797,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_OK;
}
- tx_ring = adapter->tx_ring[skb->queue_mapping];
-
/* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
* + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
* + 2 desc gap to keep tail from touching head,
@@ -3780,6 +3849,29 @@ out_drop:
return NETDEV_TX_OK;
}
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ struct ixgbevf_ring *tx_ring;
+
+ if (skb->len <= 0) {
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* The minimum packet size for olinfo paylen is 17 so pad the skb
+ * in order to meet this minimum size requirement.
+ */
+ if (skb->len < 17) {
+ if (skb_padto(skb, 17))
+ return NETDEV_TX_OK;
+ skb->len = 17;
+ }
+
+ tx_ring = adapter->tx_ring[skb->queue_mapping];
+ return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
/**
* ixgbevf_set_mac - Change the Ethernet Address of the NIC
* @netdev: network interface device structure
@@ -3839,6 +3931,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
/* must set new MTU before calling down or up */
netdev->mtu = new_mtu;
+ if (netif_running(netdev))
+ ixgbevf_reinit_locked(adapter);
+
return 0;
}
@@ -3917,17 +4012,11 @@ static int ixgbevf_resume(struct pci_dev *pdev)
rtnl_lock();
err = ixgbevf_init_interrupt_scheme(adapter);
+ if (!err && netif_running(netdev))
+ err = ixgbevf_open(netdev);
rtnl_unlock();
- if (err) {
- dev_err(&pdev->dev, "Cannot initialize interrupts\n");
+ if (err)
return err;
- }
-
- if (netif_running(netdev)) {
- err = ixgbevf_open(netdev);
- if (err)
- return err;
- }
netif_device_attach(netdev);
@@ -3953,6 +4042,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
+ rcu_read_lock();
for (i = 0; i < adapter->num_rx_queues; i++) {
ring = adapter->rx_ring[i];
do {
@@ -3974,6 +4064,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
stats->tx_bytes += bytes;
stats->tx_packets += packets;
}
+ rcu_read_unlock();
}
#define IXGBEVF_MAX_MAC_HDR_LEN 127
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 5a1668cdb461..9418f6eed086 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -65,6 +65,10 @@
#define MVPP2_RXQ_PACKET_OFFSET_MASK 0x70000000
#define MVPP2_RXQ_DISABLE_MASK BIT(31)
+/* Top Registers */
+#define MVPP2_MH_REG(port) (0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED BIT(5)
+
/* Parser Registers */
#define MVPP2_PRS_INIT_LOOKUP_REG 0x1000
#define MVPP2_PRS_PORT_LU_MAX 0xf
@@ -473,6 +477,7 @@
#define MVPP2_ETH_TYPE_LEN 2
#define MVPP2_PPPOE_HDR_SIZE 8
#define MVPP2_VLAN_TAG_LEN 4
+#define MVPP2_VLAN_TAG_EDSA_LEN 8
/* Lbtd 802.3 type */
#define MVPP2_IP_LBDT_TYPE 0xfffa
@@ -609,35 +614,64 @@ enum mvpp2_tag_type {
#define MVPP2_PRS_TCAM_LU_BYTE 20
#define MVPP2_PRS_TCAM_EN_OFFS(offs) ((offs) + 2)
#define MVPP2_PRS_TCAM_INV_WORD 5
+
+#define MVPP2_PRS_VID_TCAM_BYTE 2
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX 11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
/* Tcam entries ID */
#define MVPP2_PE_DROP_ALL 0
#define MVPP2_PE_FIRST_FREE_TID 1
-#define MVPP2_PE_LAST_FREE_TID (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \
+ MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_VID_FILT_RANGE_START - 1)
#define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30)
#define MVPP2_PE_MAC_MC_IP6 (MVPP2_PRS_TCAM_SRAM_SIZE - 29)
#define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28)
#define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 27)
#define MVPP2_PE_LAST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6)
#define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 5)
#define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 4)
#define MVPP2_PE_MAC_MC_ALL (MVPP2_PRS_TCAM_SRAM_SIZE - 3)
#define MVPP2_PE_MAC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2)
#define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \
+ ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port) (MVPP2_PRS_VID_PORT_FIRST(port) \
+ + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
/* Sram structure
* The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
*/
@@ -725,6 +759,7 @@ enum mvpp2_tag_type {
#define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT BIT(4)
#define MVPP2_PRS_SINGLE_VLAN_AI 0
#define MVPP2_PRS_DBL_VLAN_AI_BIT BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT BIT(0)
/* DSA/EDSA type */
#define MVPP2_PRS_TAGGED true
@@ -747,6 +782,7 @@ enum mvpp2_prs_lookup {
MVPP2_PRS_LU_MAC,
MVPP2_PRS_LU_DSA,
MVPP2_PRS_LU_VLAN,
+ MVPP2_PRS_LU_VID,
MVPP2_PRS_LU_L2,
MVPP2_PRS_LU_PPPOE,
MVPP2_PRS_LU_IP4,
@@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
}
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+ unsigned short vid)
+{
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+ mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
/* Set bits in sram sw entry */
static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
int val)
@@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
pe.index = tid;
- /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
- mvpp2_prs_sram_shift_set(&pe, shift,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
/* Update shadow table */
mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
if (tagged) {
/* Set tagged bit in DSA tag */
mvpp2_prs_tcam_data_byte_set(&pe, 0,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
- MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
- /* Clear all ai bits for next iteration */
- mvpp2_prs_sram_ai_update(&pe, 0,
- MVPP2_PRS_SRAM_AI_MASK);
- /* If packet is tagged continue check vlans */
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+ MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+ /* Set ai bits for next iteration */
+ if (extend)
+ mvpp2_prs_sram_ai_update(&pe, 1,
+ MVPP2_PRS_SRAM_AI_MASK);
+ else
+ mvpp2_prs_sram_ai_update(&pe, 0,
+ MVPP2_PRS_SRAM_AI_MASK);
+
+ /* If packet is tagged continue check vid filtering */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
} else {
+ /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+ mvpp2_prs_sram_shift_set(&pe, shift,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
/* Set result info bits to 'no vlans' */
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
mvpp2_prs_match_etype(pe, 0, tpid);
- mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
- /* Shift 4 bytes - skip 1 vlan tag */
- mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
- MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+ /* VLAN tag detected, proceed with VID filtering */
+ mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
/* Clear all ai bits for next iteration */
mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
@@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
mvpp2_prs_match_etype(pe, 4, tpid2);
mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
- /* Shift 8 bytes - skip 2 vlan tags */
- mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+ /* Shift 4 bytes - skip outer vlan tag */
+ mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
MVPP2_PRS_RI_VLAN_MASK);
@@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
mvpp2_prs_hw_write(priv, &pe);
}
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+ struct mvpp2_prs_entry pe;
+
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 4 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ /* Set default vid entry for extended DSA*/
+ memset(&pe, 0, sizeof(pe));
+
+ /* Set default vid entry */
+ pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+ MVPP2_PRS_EDSA_VID_AI_BIT);
+
+ /* Skip VLAN header - Set offset to 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+ MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Unmask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+ /* Update shadow table and hw entry */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Match basic ethertypes */
static int mvpp2_prs_etype_init(struct mvpp2 *priv)
{
@@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
pe.index = MVPP2_PE_VLAN_DBL;
- mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
/* Clear ai for next iterations */
mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
return 0;
}
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+ u16 mask)
+{
+ unsigned char byte[2], enable[2];
+ struct mvpp2_prs_entry pe;
+ u16 rvid, rmask;
+ int tid;
+
+ /* Go through the all entries with MVPP2_PRS_LU_VID */
+ for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+ tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+ if (!priv->prs_shadow[tid].valid ||
+ priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+ continue;
+
+ pe.index = tid;
+
+ mvpp2_prs_hw_read(priv, &pe);
+ mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+ mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+ rvid = ((byte[0] & 0xf) << 8) + byte[1];
+ rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+ if (rvid != vid || rmask != mask)
+ continue;
+
+ return tid;
+ }
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+ unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+ port->id * MVPP2_PRS_VLAN_FILT_MAX;
+ unsigned int mask = 0xfff, reg_val, shift;
+ struct mvpp2 *priv = port->priv;
+ struct mvpp2_prs_entry pe;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ /* No such entry */
+ if (!tid) {
+ memset(&pe, 0, sizeof(pe));
+
+ /* Go through all entries from first to last in vlan range */
+ tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+ vid_start +
+ MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+ /* There isn't room for a new VID filter */
+ if (tid < 0)
+ return tid;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+ pe.index = tid;
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+ } else {
+ mvpp2_prs_hw_read(priv, &pe);
+ }
+
+ /* Enable the current port */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Set match on VID */
+ mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+
+ return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ /* Scan TCAM and see if entry with this <vid,port> already exist */
+ tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+ /* No such entry */
+ if (tid)
+ return;
+
+ mvpp2_prs_hw_inv(priv, tid);
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+ struct mvpp2 *priv = port->priv;
+ int tid;
+
+ for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+ tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+ if (priv->prs_shadow[tid].valid)
+ mvpp2_prs_vid_entry_remove(port, tid);
+ }
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+
+ /* Invalidate the guard entry */
+ mvpp2_prs_hw_inv(priv, tid);
+
+ priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+ unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+ struct mvpp2 *priv = port->priv;
+ unsigned int reg_val, shift;
+ struct mvpp2_prs_entry pe;
+
+ if (priv->prs_shadow[tid].valid)
+ return;
+
+ memset(&pe, 0, sizeof(pe));
+
+ pe.index = tid;
+
+ reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+ if (reg_val & MVPP2_DSA_EXTENDED)
+ shift = MVPP2_VLAN_TAG_EDSA_LEN;
+ else
+ shift = MVPP2_VLAN_TAG_LEN;
+
+ mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+ /* Mask all ports */
+ mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+ /* Update port mask */
+ mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+ /* Continue - set next lookup */
+ mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+ /* Skip VLAN header - Set offset to 4 or 8 bytes */
+ mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+ /* Drop VLAN packets that don't belong to any VIDs on this port */
+ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+ MVPP2_PRS_RI_DROP_MASK);
+
+ /* Clear all ai bits for next iteration */
+ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+ /* Update shadow table */
+ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+ mvpp2_prs_hw_write(priv, &pe);
+}
+
/* Parser default initialization */
static int mvpp2_prs_default_init(struct platform_device *pdev,
struct mvpp2 *priv)
@@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
mvpp2_prs_dsa_init(priv);
+ mvpp2_prs_vid_init(priv);
+
err = mvpp2_prs_etype_init(priv);
if (err)
return err;
@@ -7153,6 +7447,12 @@ retry:
}
}
}
+
+ /* Disable VLAN filtering in promiscuous mode */
+ if (dev->flags & IFF_PROMISC)
+ mvpp2_prs_vid_disable_filtering(port);
+ else
+ mvpp2_prs_vid_enable_filtering(port);
}
static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7292,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return ret;
}
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret;
+
+ ret = mvpp2_prs_vid_entry_add(port, vid);
+ if (ret)
+ netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+ MVPP2_PRS_VLAN_FILT_MAX - 1);
+ return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ mvpp2_prs_vid_entry_remove(port, vid);
+ return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ netdev_features_t changed = dev->features ^ features;
+ struct mvpp2_port *port = netdev_priv(dev);
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ mvpp2_prs_vid_enable_filtering(port);
+ } else {
+ /* Invalidate all registered VID filters for this
+ * port
+ */
+ mvpp2_prs_vid_remove_all(port);
+
+ mvpp2_prs_vid_disable_filtering(port);
+ }
+ }
+
+ return 0;
+}
+
/* Ethtool methods */
/* Set interrupt coalescing for ethtools */
@@ -7433,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
.ndo_change_mtu = mvpp2_change_mtu,
.ndo_get_stats64 = mvpp2_get_stats64,
.ndo_do_ioctl = mvpp2_ioctl,
+ .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid,
+ .ndo_set_features = mvpp2_set_features,
};
static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7945,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->features = features | NETIF_F_RXCSUM;
- dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+ dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
dev->vlan_features |= features;
dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ebc1f566a4d9..9a7a2f05ab35 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
"rx_xdp_drop",
"rx_xdp_tx",
"rx_xdp_tx_full",
+
+ /* phy statistics */
+ "rx_packets_phy", "rx_bytes_phy",
+ "tx_packets_phy", "tx_bytes_phy",
};
static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
if (bitmap_iterator_test(&it))
data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
+ for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
data[index++] = priv->tx_ring[TX][i]->packets;
data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
strcpy(data + (index++) * ETH_GSTRING_LEN,
main_strings[strings]);
+ for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
for (i = 0; i < priv->tx_ring_num[TX]; i++) {
sprintf(data + (index++) * ETH_GSTRING_LEN,
"tx%d_packets", i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 8fc51bc29003..e0adac4a9a19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
last_i += NUM_XDP_STATS;
+
+ if (!mlx4_is_slave(dev))
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+ last_i += NUM_PHY_STATS;
}
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
mlx4_en_stop_port(dev, 1);
}
- en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
- ts_config.rx_filter,
- !!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
mlx4_en_safe_replace_resources(priv, tmp);
if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 1fa4849a6f56..0158b88bea5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
priv->port_stats.xmit_more += READ_ONCE(ring->xmit_more);
}
- if (mlx4_is_master(mdev->dev)) {
- stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
- &mlx4_en_stats->RTOT_prio_1,
- NUM_PRIORITIES);
- stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
- &mlx4_en_stats->TTOT_prio_1,
- NUM_PRIORITIES);
- stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
- &mlx4_en_stats->ROCT_prio_1,
- NUM_PRIORITIES);
- stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
- &mlx4_en_stats->TOCT_prio_1,
- NUM_PRIORITIES);
+ if (!mlx4_is_slave(mdev->dev)) {
+ struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+ p_stats->rx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+ &mlx4_en_stats->RTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+ &mlx4_en_stats->TTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->rx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+ &mlx4_en_stats->ROCT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+ &mlx4_en_stats->TOCT_prio_1,
+ NUM_PRIORITIES);
+ if (mlx4_is_master(mdev->dev)) {
+ stats->rx_packets = p_stats->rx_packets_phy;
+ stats->tx_packets = p_stats->tx_packets_phy;
+ stats->rx_bytes = p_stats->rx_bytes_phy;
+ stats->tx_bytes = p_stats->tx_bytes_phy;
+ }
}
/* net device stats */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b4d144e67514..c2c6bd7578fd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -649,6 +649,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
}
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -662,12 +668,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
int polled = 0;
int index;
- if (unlikely(!priv->port_up))
+ if (unlikely(!priv->port_up || budget <= 0))
return 0;
- if (unlikely(budget <= 0))
- return polled;
-
ring = priv->rx_ring[cq_ring];
/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
@@ -838,12 +841,7 @@ xdp_drop_no_cnt:
ring->csum_ok++;
} else {
if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
- (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
- MLX4_CQE_STATUS_IPV6))))
-#else
- 0))))
-#endif
+ (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
goto csum_none;
if (check_csum(cqe, skb, va, dev->features))
goto csum_none;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f470ae37d937..f7c81133594f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
struct mlx4_en_flow_stats_tx tx_flowstats;
struct mlx4_en_port_stats port_stats;
struct mlx4_en_xdp_stats xdp_stats;
+ struct mlx4_en_phy_stats phy_stats;
struct mlx4_en_stats_bitmap stats_bitmap;
struct list_head mc_list;
struct list_head curr_list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index aab28eb27a30..86b6051da8ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
#define NUM_XDP_STATS 3
};
+struct mlx4_en_phy_stats {
+ unsigned long rx_packets_phy;
+ unsigned long rx_bytes_phy;
+ unsigned long tx_packets_phy;
+ unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS 4
+};
+
#define NUM_MAIN_STATS 21
#define MLX4_NUM_PRIORITIES 8
@@ -116,7 +124,7 @@ enum {
#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
- NUM_XDP_STATS)
+ NUM_XDP_STATS + NUM_PHY_STATS)
#define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
sizeof(((struct net_device_stats *)0)->n))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 17b723218b0c..b994b80d5714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
}
EXPORT_SYMBOL(mlx5_unregister_interface);
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+ mutex_lock(&mlx5_intf_mutex);
+ mlx5_remove_dev_by_protocol(mdev, protocol);
+ mlx5_add_dev_by_protocol(mdev, protocol);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 363d8dcb7f17..ea4b255380a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
kfree(ppriv); /* mlx5e_rep_priv */
}
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+
+ return rpriv->netdev;
+}
+
static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
rep_if.load = mlx5e_vport_rep_load;
rep_if.unload = mlx5e_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
}
}
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
rep_if.load = mlx5e_nic_rep_load;
rep_if.unload = mlx5e_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
rep_if.priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c2b1d7d351fc..77b7272eaaa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
esw->mode = mode;
- if (mode == SRIOV_LEGACY)
+ if (mode == SRIOV_LEGACY) {
err = esw_create_legacy_fdb_table(esw, nvfs + 1);
- else
+ } else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
err = esw_offloads_init(esw, nvfs + 1);
+ }
+
if (err)
goto abort;
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
abort:
esw->mode = SRIOV_NONE;
+
+ if (mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
return err;
}
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
{
struct esw_mc_addr *mc_promisc;
+ int old_mode;
int nvports;
int i;
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
else if (esw->mode == SRIOV_OFFLOADS)
esw_offloads_cleanup(esw, nvports);
+ old_mode = esw->mode;
esw->mode = SRIOV_NONE;
+
+ if (old_mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
}
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -2175,3 +2188,9 @@ free_out:
kvfree(out);
return err;
}
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2fa037066b2f..98d2177d0806 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -37,19 +37,9 @@
#include <linux/if_link.h>
#include <net/devlink.h>
#include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
#include "lib/mpfs.h"
-enum {
- SRIOV_NONE,
- SRIOV_LEGACY,
- SRIOV_OFFLOADS
-};
-
-enum {
- REP_ETH,
- NUM_REP_TYPES,
-};
-
#ifdef CONFIG_MLX5_ESWITCH
#define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb {
struct mlx5_flow_table *fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
- struct mlx5_flow_handle *miss_rule;
+ struct mlx5_flow_handle *miss_rule_uni;
+ struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
} offloads;
};
};
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
- int (*load)(struct mlx5_core_dev *dev,
- struct mlx5_eswitch_rep *rep);
- void (*unload)(struct mlx5_eswitch_rep *rep);
- void *priv;
- bool valid;
-};
-
-struct mlx5_eswitch_rep {
- struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
- u16 vport;
- u8 hw_id[ETH_ALEN];
- u16 vlan;
- u32 vlan_refcount;
-};
-
struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
@@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
int vport,
struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
- u32 sqn);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
struct mlx5_flow_spec;
@@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
- int vport_index,
- struct mlx5_eswitch_rep_if *rep_if,
- u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
- int vport_index,
- u8 rep_type);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 99f583a15cc3..0a8303c1b52f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -338,6 +338,7 @@ out:
kvfree(spec);
return flow_rule;
}
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
{
@@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle *flow_rule = NULL;
struct mlx5_flow_spec *spec;
+ void *headers_c;
+ void *headers_v;
int err = 0;
+ u8 *dmac_c;
+ u8 *dmac_v;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
@@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
goto out;
}
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+ outer_headers.dmac_47_16);
+ dmac_c[0] = 0x01;
+
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport_num = 0;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
&flow_act, &dest, 1);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
- esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err);
+ esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
goto out;
}
- esw->fdb_table.offloads.miss_rule = flow_rule;
+ esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+ outer_headers.dmac_47_16);
+ dmac_v[0] = 0x01;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
out:
kvfree(spec);
return err;
@@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
}
#define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
{
@@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
struct mlx5_flow_group *g;
void *match_criteria;
u32 *flow_group_in;
+ u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
if (err)
goto fast_fdb_err;
- table_size = nvports + MAX_PF_SQ + 1;
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
ft_attr.max_fte = table_size;
ft_attr.prio = FDB_SLOW_PATH;
@@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
- ix = nvports + MAX_PF_SQ;
+ ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
@@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
/* create miss group */
memset(flow_group_in, 0, inlen);
- MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
g = mlx5_create_flow_group(fdb, flow_group_in);
if (IS_ERR(g)) {
@@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
return;
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
- mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
@@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
- /* disable PF RoCE so missed packets don't go through RoCE steering */
- mlx5_dev_list_lock();
- mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
-
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
- goto create_fdb_err;
+ return err;
err = esw_create_offloads_table(esw);
if (err)
@@ -821,12 +854,6 @@ create_fg_err:
create_ft_err:
esw_destroy_offloads_fdb_tables(esw);
-create_fdb_err:
- /* enable back PF RoCE */
- mlx5_dev_list_lock();
- mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
-
return err;
}
@@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
}
/* enable back PF RoCE */
- mlx5_dev_list_lock();
- mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
- mlx5_dev_list_unlock();
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
return err;
}
@@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
rep_if->load = __rep_if->load;
rep_if->unload = __rep_if->unload;
+ rep_if->get_proto_dev = __rep_if->get_proto_dev;
rep_if->priv = __rep_if->priv;
rep_if->valid = true;
}
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
int vport_index, u8 rep_type)
@@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
rep->rep_if[rep_type].valid = false;
}
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
{
@@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
rep = &offloads->vport_reps[UPLINK_REP_INDEX];
return rep->rep_if[rep_type].priv;
}
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ if (vport == FDB_UPLINK_VPORT)
+ vport = UPLINK_REP_INDEX;
+
+ rep = &offloads->vport_reps[vport];
+
+ if (rep->rep_if[rep_type].valid &&
+ rep->rep_if[rep_type].get_proto_dev)
+ return rep->rep_if[rep_type].get_proto_dev(rep);
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport)
+{
+ return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 23e17ac0cba5..4e25f2b2e0bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -43,12 +43,6 @@
#define DRIVER_NAME "mlx5_core"
#define DRIVER_VERSION "5.0-0"
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
- (MLX5_CAP_GEN(mdev, vport_group_manager) && \
- (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
- mlx5_core_is_pf(mdev))
-
extern uint mlx5_core_debug_mask;
#define mlx5_core_dbg(__dev, format, ...) \
@@ -207,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
int mlx5_lag_allow(struct mlx5_core_dev *dev);
int mlx5_lag_forbid(struct mlx5_core_dev *dev);
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index d56eea310509..93d97b4676eb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -78,6 +78,10 @@ config MLXSW_SPECTRUM
depends on IPV6 || IPV6=n
select PARMAN
select MLXFW
+ depends on NET_IPGRE
+ depends on !(MLXSW_CORE=y && NET_IPGRE=m)
+ depends on IPV6_GRE
+ depends on !(MLXSW_CORE=y && IPV6_GRE=m)
default m
---help---
This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index b698fb481b2e..ba338428ffd1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -1,6 +1,6 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
@@ -838,7 +838,6 @@ struct mlxsw_afa_mirror {
struct mlxsw_afa_resource resource;
int span_id;
u8 local_in_port;
- u8 local_out_port;
bool ingress;
};
@@ -848,7 +847,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
{
block->afa->ops->mirror_del(block->afa->ops_priv,
mirror->local_in_port,
- mirror->local_out_port,
+ mirror->span_id,
mirror->ingress);
kfree(mirror);
}
@@ -864,9 +863,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
}
static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
- bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
@@ -876,13 +874,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
return ERR_PTR(-ENOMEM);
err = block->afa->ops->mirror_add(block->afa->ops_priv,
- local_in_port, local_out_port,
+ local_in_port, out_dev,
ingress, &mirror->span_id);
if (err)
goto err_mirror_add;
mirror->ingress = ingress;
- mirror->local_out_port = local_out_port;
mirror->local_in_port = local_in_port;
mirror->resource.destructor = mlxsw_afa_mirror_destructor;
mlxsw_afa_resource_add(block, &mirror->resource);
@@ -909,13 +906,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
}
int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
{
struct mlxsw_afa_mirror *mirror;
int err;
- mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+ mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
ingress);
if (IS_ERR(mirror))
return PTR_ERR(mirror);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 43132293475c..6dd601703c99 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -36,6 +36,7 @@
#define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
#include <linux/types.h>
+#include <linux/netdevice.h>
struct mlxsw_afa;
struct mlxsw_afa_block;
@@ -48,9 +49,10 @@ struct mlxsw_afa_ops {
void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
void (*counter_index_put)(void *priv, unsigned int counter_index);
- int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+ int (*mirror_add)(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress, int *p_span_id);
- void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+ void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
bool ingress);
};
@@ -70,7 +72,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
u16 trap_id);
int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
- u8 local_in_port, u8 local_out_port,
+ u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress);
int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
u8 local_port, bool in_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0e08be41c8e0..cb5f77f09f8e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1,11 +1,11 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
* Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -6772,8 +6772,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
*/
MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
+enum mlxsw_reg_mpat_span_type {
+ /* Local SPAN Ethernet.
+ * The original packet is not encapsulated.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+ /* Encapsulated Remote SPAN Ethernet L3 GRE.
+ * The packet is encapsulated with GRE header.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
- u16 system_port, bool e)
+ u16 system_port, bool e,
+ enum mlxsw_reg_mpat_span_type span_type)
{
MLXSW_REG_ZERO(mpat, payload);
mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6781,6 +6877,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
mlxsw_reg_mpat_e_set(payload, e);
mlxsw_reg_mpat_qos_set(payload, 1);
mlxsw_reg_mpat_be_set(payload, 1);
+ mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+ mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+ enum mlxsw_reg_mpat_eth_rspan_version version,
+ const char *mac,
+ bool tp)
+{
+ mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+ mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+ mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+ const char *smac,
+ u32 sip, u32 dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+ mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+ mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+ const char *smac,
+ struct in6_addr sip, struct in6_addr dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+ mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+ mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
}
/* MPAR - Monitoring Port Analyzer Register
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bfde93910f82..7c6204f701ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1,6 +1,6 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
xstats->tail_drop[i] =
mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
}
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+ i, ppcnt_pl);
+ if (err)
+ continue;
+
+ xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+ xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+ }
}
static void update_stats_cache(struct work_struct *work)
@@ -1258,7 +1268,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
bool ingress)
{
enum mlxsw_sp_span_type span_type;
- struct mlxsw_sp_port *to_port;
struct net_device *to_dev;
to_dev = tcf_mirred_dev(a);
@@ -1267,17 +1276,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
return -EINVAL;
}
- if (!mlxsw_sp_port_dev_check(to_dev)) {
- netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
- return -EOPNOTSUPP;
- }
- to_port = netdev_priv(to_dev);
-
- mirror->to_local_port = to_port->local_port;
mirror->ingress = ingress;
span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
- true);
+ return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+ true, &mirror->span_id);
}
static void
@@ -1288,7 +1290,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
span_type = mirror->ingress ?
MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+ mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
span_type, true);
}
@@ -3675,14 +3677,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_afa_init;
}
+ err = mlxsw_sp_span_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+ goto err_span_init;
+ }
+
+ /* Initialize router after SPAN is initialized, so that the FIB and
+ * neighbor event handlers can issue SPAN respin.
+ */
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
goto err_router_init;
}
- /* Initialize netdevice notifier after router is initialized, so that
- * the event handler can use router structures.
+ /* Initialize netdevice notifier after router and SPAN is initialized,
+ * so that the event handler can use router structures and call SPAN
+ * respin.
*/
mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -3691,12 +3703,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_netdev_notifier;
}
- err = mlxsw_sp_span_init(mlxsw_sp);
- if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
- goto err_span_init;
- }
-
err = mlxsw_sp_acl_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -3722,12 +3728,12 @@ err_ports_create:
err_dpipe_init:
mlxsw_sp_acl_fini(mlxsw_sp);
err_acl_init:
- mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
+ mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
mlxsw_sp_afa_fini(mlxsw_sp);
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3753,9 +3759,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_dpipe_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
- mlxsw_sp_span_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
+ mlxsw_sp_span_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4639,10 +4645,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlxsw_sp_span_entry *span_entry;
struct mlxsw_sp *mlxsw_sp;
int err = 0;
mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+ if (event == NETDEV_UNREGISTER) {
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+ if (span_entry)
+ mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+ }
+ mlxsw_sp_span_respin(mlxsw_sp);
+
if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 675e03a892ed..d5e711d8ad71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -124,7 +124,7 @@ enum mlxsw_sp_port_mall_action_type {
};
struct mlxsw_sp_port_mall_mirror_tc_entry {
- u8 to_local_port;
+ int span_id;
bool ingress;
};
@@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats {
u64 wred_drop[TC_MAX_QUEUE];
u64 tail_drop[TC_MAX_QUEUE];
u64 backlog[TC_MAX_QUEUE];
+ u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+ u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
};
struct mlxsw_sp_port {
@@ -247,6 +249,7 @@ struct mlxsw_sp_port {
struct mlxsw_sp_port_sample *sample;
struct list_head vlans_list;
struct mlxsw_sp_qdisc *root_qdisc;
+ struct mlxsw_sp_qdisc *tclass_qdiscs;
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 0897a5435cc2..21ed27ae51e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -572,7 +572,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
struct net_device *out_dev)
{
struct mlxsw_sp_acl_block_binding *binding;
- struct mlxsw_sp_port *out_port;
struct mlxsw_sp_port *in_port;
if (!list_is_singular(&block->binding_list))
@@ -581,16 +580,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
binding = list_first_entry(&block->binding_list,
struct mlxsw_sp_acl_block_binding, list);
in_port = binding->mlxsw_sp_port;
- if (!mlxsw_sp_port_dev_check(out_dev))
- return -EINVAL;
-
- out_port = netdev_priv(out_dev);
- if (out_port->mlxsw_sp != mlxsw_sp)
- return -EINVAL;
return mlxsw_afa_block_append_mirror(rulei->act_block,
in_port->local_port,
- out_port->local_port,
+ out_dev,
binding->ingress);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index f7e61cecc42b..510ce48d87f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -1,6 +1,6 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
*
@@ -126,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
}
static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
bool ingress, int *p_span_id)
{
- struct mlxsw_sp_port *in_port, *out_port;
- struct mlxsw_sp_span_entry *span_entry;
+ struct mlxsw_sp_port *in_port;
struct mlxsw_sp *mlxsw_sp = priv;
enum mlxsw_sp_span_type type;
- int err;
type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
- out_port = mlxsw_sp->ports[local_out_port];
in_port = mlxsw_sp->ports[local_in_port];
- err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
- if (err)
- return err;
-
- span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
- if (!span_entry) {
- err = -ENOENT;
- goto err_span_entry_find;
- }
-
- *p_span_id = span_entry->id;
- return 0;
-
-err_span_entry_find:
- mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
- return err;
+ return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+ false, p_span_id);
}
static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
- bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
{
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_port *in_port;
@@ -168,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
in_port = mlxsw_sp->ports[local_in_port];
- mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+ mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
}
static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index a1c4b1e63f8d..98d896c14b87 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -1,7 +1,7 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -33,6 +33,7 @@
*/
#include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
#include "spectrum_ipip.h"
@@ -44,6 +45,14 @@ mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
return tun->parms;
}
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+ struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+ return tun->parms;
+}
+
static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
{
return !!(parms.i_flags & TUNNEL_KEY);
@@ -73,23 +82,37 @@ mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
}
static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
+}
+
+static union mlxsw_sp_l3addr
mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
{
return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
}
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
return mlxsw_sp_ipip_parms4_saddr(parms4);
case MLXSW_SP_L3_PROTO_IPV6:
- break;
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_saddr(parms6);
}
WARN_ON(1);
@@ -109,19 +132,28 @@ mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
return mlxsw_sp_ipip_parms4_daddr(parms4);
case MLXSW_SP_L3_PROTO_IPV6:
- break;
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_daddr(parms6);
}
WARN_ON(1);
return (union mlxsw_sp_l3addr) {0};
}
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
+{
+ union mlxsw_sp_l3addr naddr = {0};
+
+ return !memcmp(&addr, &naddr, sizeof(naddr));
+}
+
static int
mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
@@ -215,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
{
union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
- union mlxsw_sp_l3addr naddr = {0};
/* Tunnels with unset local or remote address are valid in Linux and
* used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
* (NBMA) tunnels. In principle these can be offloaded, but the driver
* currently doesn't support this. So punt.
*/
- return memcmp(&saddr, &naddr, sizeof(naddr)) &&
- memcmp(&daddr, &naddr, sizeof(naddr));
+ return !mlxsw_sp_l3addr_is_zero(saddr) &&
+ !mlxsw_sp_l3addr_is_zero(daddr);
}
static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index a4ff5737eccc..6909d867bb59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -1,7 +1,7 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -41,11 +41,15 @@
struct ip_tunnel_parm
mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_GRE4,
MLXSW_SP_IPIP_TYPE_MAX,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index d27fa57ad3c3..059eb3214328 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -270,6 +270,8 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
break;
+ default:
+ return -EINVAL;
}
err = devlink_resource_size_get(devlink, resource_id, &resource_size);
@@ -278,7 +280,7 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
resource_size = info->end_index - info->start_index + 1;
}
- nr_entries = resource_size / info->alloc_size;
+ nr_entries = div_u64(resource_size, info->alloc_size);
usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
if (!part)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index d20b143de3b4..978a3c70653a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- ivif = mr_route->mfc4->mfc_parent;
- return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+ ivif = mr_route->mfc4->_c.mfc_parent;
+ return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
default:
@@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
mr_route->mfc4 = mfc;
mr_route->mr_table = mr_table;
for (i = 0; i < MAXVIFS; i++) {
- if (mfc->mfc_un.res.ttls[i] != 255) {
+ if (mfc->_c.mfc_un.res.ttls[i] != 255) {
err = mlxsw_sp_mr_route_evif_link(mr_route,
&mr_table->vifs[i]);
if (err)
@@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
}
}
- mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+ mlxsw_sp_mr_route_ivif_link(mr_route,
+ &mr_table->vifs[mfc->_c.mfc_parent]);
mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
return mr_route;
@@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
if (offload)
- mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+ mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
else
- mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+ mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
break;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
@@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
switch (mr_route->mr_table->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
- if (mr_route->mfc4->mfc_un.res.pkt != packets)
- mr_route->mfc4->mfc_un.res.lastuse = jiffies;
- mr_route->mfc4->mfc_un.res.pkt = packets;
- mr_route->mfc4->mfc_un.res.bytes = bytes;
+ if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+ mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+ mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+ mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
break;
case MLXSW_SP_L3_PROTO_IPV6:
/* fall through */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0b7670459051..91262b0573e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -42,6 +42,8 @@
#include "reg.h"
#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+ MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
enum mlxsw_sp_qdisc_type {
MLXSW_SP_QDISC_NO_QDISC,
@@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
struct mlxsw_sp_qdisc {
u32 handle;
u8 tclass_num;
+ u8 prio_bitmap;
union {
struct red_stats red;
} xstats_base;
@@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
mlxsw_sp_qdisc->handle == handle;
}
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+ bool root_only)
+{
+ int tclass, child_index;
+
+ if (parent == TC_H_ROOT)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (root_only || !mlxsw_sp_port->root_qdisc ||
+ !mlxsw_sp_port->root_qdisc->ops ||
+ TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+ TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+ return NULL;
+
+ child_index = TC_H_MIN(parent);
+ tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+ return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+ int i;
+
+ if (mlxsw_sp_port->root_qdisc->handle == handle)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+ return NULL;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+ return &mlxsw_sp_port->tclass_qdiscs[i];
+
+ return NULL;
+}
+
static int
mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
return -EOPNOTSUPP;
}
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+ u8 prio_bitmap, u64 *tx_packets,
+ u64 *tx_bytes)
+{
+ int i;
+
+ *tx_packets = 0;
+ *tx_bytes = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (prio_bitmap & BIT(i)) {
+ *tx_packets += xstats->tx_packets[i];
+ *tx_bytes += xstats->tx_bytes[i];
+ }
+ }
+}
+
static int
mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
int tclass_num, u32 min, u32 max,
@@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
struct red_stats *red_base;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
red_base = &mlxsw_sp_qdisc->xstats_base.red;
- stats_base->tx_packets = stats->tx_packets;
- stats_base->tx_bytes = stats->tx_bytes;
-
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &stats_base->tx_packets,
+ &stats_base->tx_bytes);
red_base->prob_mark = xstats->ecn;
red_base->prob_drop = xstats->wred_drop[tclass_num];
red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -255,6 +312,12 @@ static int
mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
+ struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+ if (root_qdisc != mlxsw_sp_qdisc)
+ root_qdisc->stats_base.backlog -=
+ mlxsw_sp_qdisc->stats_base.backlog;
+
return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
mlxsw_sp_qdisc->tclass_num);
}
@@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_qdisc->stats_base.backlog);
p->qstats->backlog -= backlog;
+ mlxsw_sp_qdisc->stats_base.backlog = 0;
}
static int
@@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
- struct rtnl_link_stats64 *stats;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
- tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
- tx_packets = stats->tx_packets - stats_base->tx_packets;
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &tx_packets, &tx_bytes);
+ tx_bytes = tx_bytes - stats_base->tx_bytes;
+ tx_packets = tx_packets - stats_base->tx_packets;
+
overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
stats_base->overlimits;
drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
if (p->command == TC_RED_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
@@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
{
int i;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
MLXSW_SP_PORT_DEFAULT_TCLASS);
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[i]);
+ mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+ }
return 0;
}
@@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
- int tclass, i;
+ struct mlxsw_sp_qdisc *child_qdisc;
+ int tclass, i, band, backlog;
+ u8 old_priomap;
int err;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
- tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
- err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
- if (err)
- return err;
+ for (band = 0; band < p->bands; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ old_priomap = child_qdisc->prio_bitmap;
+ child_qdisc->prio_bitmap = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (p->priomap[i] == band) {
+ child_qdisc->prio_bitmap |= BIT(i);
+ if (BIT(i) & old_priomap)
+ continue;
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+ i, tclass);
+ if (err)
+ return err;
+ }
+ }
+ if (old_priomap != child_qdisc->prio_bitmap &&
+ child_qdisc->ops && child_qdisc->ops->clean_stats) {
+ backlog = child_qdisc->stats_base.backlog;
+ child_qdisc->ops->clean_stats(mlxsw_sp_port,
+ child_qdisc);
+ child_qdisc->stats_base.backlog = backlog;
+ }
+ }
+ for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ child_qdisc->prio_bitmap = 0;
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
}
-
return 0;
}
@@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
drops += xstats->tail_drop[i];
+ drops += xstats->wred_drop[i];
backlog += xstats->backlog[i];
}
drops = drops - stats_base->drops;
@@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
stats_base->tx_bytes = stats->tx_bytes;
stats_base->drops = 0;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
stats_base->drops += xstats->tail_drop[i];
+ stats_base->drops += xstats->wred_drop[i];
+ }
mlxsw_sp_qdisc->stats_base.backlog = 0;
}
@@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
};
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_prio_qopt_offload_graft_params *p)
+{
+ int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+ struct mlxsw_sp_qdisc *old_qdisc;
+
+ /* Check if the grafted qdisc is already in its "new" location. If so -
+ * nothing needs to be done.
+ */
+ if (p->band < IEEE_8021QAZ_MAX_TCS &&
+ mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+ return 0;
+
+ /* See if the grafted qdisc is already offloaded on any tclass. If so,
+ * unoffload it.
+ */
+ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+ p->child_handle);
+ if (old_qdisc)
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+ return -EOPNOTSUPP;
+}
+
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
- if (p->parent != TC_H_ROOT)
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+ if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
- mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
if (p->command == TC_PRIO_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
@@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
case TC_PRIO_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
+ case TC_PRIO_GRAFT:
+ return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->graft_params);
default:
return -EOPNOTSUPP;
}
@@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
- mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
- GFP_KERNEL);
- if (!mlxsw_sp_port->root_qdisc)
- return -ENOMEM;
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+ int i;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_root_qdisc_init;
+
+ mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+ mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+ GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_tclass_qdiscs_init;
+
+ mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
return 0;
+
+err_tclass_qdiscs_init:
+ kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+ return -ENOMEM;
}
void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
{
+ kfree(mlxsw_sp_port->tclass_qdiscs);
kfree(mlxsw_sp_port->root_qdisc);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 05146970c19c..69f16c605b9d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -70,6 +70,7 @@
#include "spectrum_mr.h"
#include "spectrum_mr_tcam.h"
#include "spectrum_router.h"
+#include "spectrum_span.h"
struct mlxsw_sp_fib;
struct mlxsw_sp_vr;
@@ -2330,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
read_unlock_bh(&n->lock);
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
entry_connected = nud_state & NUD_VALID && !dead;
neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
if (!entry_connected && !neigh_entry)
@@ -5589,6 +5592,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
/* Protect internal structures from changes */
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5631,6 +5636,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
int err;
rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_ADD:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index c3bec37d71ed..f537e1de11d9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -1,6 +1,7 @@
/*
* drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,9 +33,14 @@
*/
#include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
#include "spectrum.h"
#include "spectrum_span.h"
+#include "spectrum_ipip.h"
int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
{
@@ -51,8 +57,12 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
if (!mlxsw_sp->span.entries)
return -ENOMEM;
- for (i = 0; i < mlxsw_sp->span.entries_count; i++)
- INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ INIT_LIST_HEAD(&curr->bound_ports_list);
+ curr->id = i;
+ }
return 0;
}
@@ -69,80 +79,460 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
kfree(mlxsw_sp->span.entries);
}
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
{
- struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
- struct mlxsw_sp_span_entry *span_entry;
+ sparmsp->dest_port = netdev_priv(to_dev);
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_reg_mpat_span_type span_type)
+{
+ struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+ .can_handle = mlxsw_sp_port_dev_check,
+ .parms = mlxsw_sp_span_entry_phys_parms,
+ .configure = mlxsw_sp_span_entry_phys_configure,
+ .deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+ __be32 *saddrp, __be32 *daddrp)
+{
+ struct ip_tunnel *tun = netdev_priv(to_dev);
+ struct net_device *dev = NULL;
+ struct ip_tunnel_parm parms;
+ struct rtable *rt = NULL;
+ struct flowi4 fl4;
+
+ /* We assume "dev" stays valid after rt is put. */
+ ASSERT_RTNL();
+
+ parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+ 0, 0, parms.link, tun->fwmark);
+
+ rt = ip_route_output_key(tun->net, &fl4);
+ if (IS_ERR(rt))
+ return NULL;
+
+ if (rt->rt_type != RTN_UNICAST)
+ goto out;
+
+ dev = rt->dst.dev;
+ *saddrp = fl4.saddr;
+ *daddrp = rt->rt_gateway;
+
+out:
+ ip_rt_put(rt);
+ return dev;
+}
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+ const void *pkey,
+ struct net_device *l3edev,
+ unsigned char dmac[ETH_ALEN])
+{
+ struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+ int err = 0;
+
+ if (!neigh) {
+ neigh = neigh_create(tbl, pkey, l3edev);
+ if (IS_ERR(neigh))
+ return PTR_ERR(neigh);
+ }
+
+ neigh_event_send(neigh, NULL);
+
+ read_lock_bh(&neigh->lock);
+ if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+ memcpy(dmac, neigh->ha, ETH_ALEN);
+ else
+ err = -ENOENT;
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+ return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+ sparmsp->dest_port = NULL;
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+ union mlxsw_sp_l3addr saddr,
+ union mlxsw_sp_l3addr daddr,
+ union mlxsw_sp_l3addr gw,
+ __u8 ttl,
+ struct neigh_table *tbl,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ unsigned char dmac[ETH_ALEN];
+
+ if (mlxsw_sp_l3addr_is_zero(gw))
+ gw = daddr;
+
+ if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+ mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ sparmsp->dest_port = netdev_priv(l3edev);
+ sparmsp->ttl = ttl;
+ memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+ memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+ sparmsp->saddr = saddr;
+ sparmsp->daddr = daddr;
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+ union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+ bool inherit_tos = tparm.iph.tos & 0x1;
+ bool inherit_ttl = !tparm.iph.ttl;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.iph.ttl,
+ &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, false);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+ sparms.ttl, sparms.smac,
+ be32_to_cpu(sparms.saddr.addr4),
+ be32_to_cpu(sparms.daddr.addr4));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+ .can_handle = is_gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap4_parms,
+ .configure = mlxsw_sp_span_entry_gretap4_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+ struct in6_addr *saddrp,
+ struct in6_addr *daddrp)
+{
+ struct ip6_tnl *t = netdev_priv(to_dev);
+ struct flowi6 fl6 = t->fl.u.ip6;
+ struct net_device *dev = NULL;
+ struct dst_entry *dst;
+ struct rt6_info *rt6;
+
+ /* We assume "dev" stays valid after dst is released. */
+ ASSERT_RTNL();
+
+ fl6.flowi6_mark = t->parms.fwmark;
+ if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+ return NULL;
+
+ dst = ip6_route_output(t->net, NULL, &fl6);
+ if (!dst || dst->error)
+ goto out;
+
+ rt6 = container_of(dst, struct rt6_info, dst);
+
+ dev = dst->dev;
+ *saddrp = fl6.saddr;
+ *daddrp = rt6->rt6i_gateway;
+
+out:
+ dst_release(dst);
+ return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+ bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+ union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+ union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+ bool inherit_ttl = !tparm.hop_limit;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.hop_limit,
+ &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
char mpat_pl[MLXSW_REG_MPAT_LEN];
- u8 local_port = port->local_port;
- int index;
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, false);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+ sparms.saddr.addr6,
+ sparms.daddr.addr6);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+ .can_handle = is_ip6gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap6_parms,
+ .configure = mlxsw_sp_span_entry_gretap6_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+ &mlxsw_sp_span_entry_ops_phys,
+ &mlxsw_sp_span_entry_ops_gretap4,
+ &mlxsw_sp_span_entry_ops_gretap6,
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+ .parms = mlxsw_sp_span_entry_nop_parms,
+ .configure = mlxsw_sp_span_entry_nop_configure,
+ .deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ if (sparms.dest_port) {
+ if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+ netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ } else if (span_entry->ops->configure(span_entry, sparms)) {
+ netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ }
+ }
+
+ span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ if (span_entry->parms.dest_port)
+ span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_span_entry *span_entry = NULL;
int i;
- int err;
/* find a free entry to use */
- index = -1;
for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
if (!mlxsw_sp->span.entries[i].ref_count) {
- index = i;
span_entry = &mlxsw_sp->span.entries[i];
break;
}
}
- if (index < 0)
- return NULL;
-
- /* create a new port analayzer entry for local_port */
- mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
- if (err)
+ if (!span_entry)
return NULL;
- span_entry->id = index;
+ span_entry->ops = ops;
span_entry->ref_count = 1;
- span_entry->local_port = local_port;
+ span_entry->to_dev = to_dev;
+ mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
return span_entry;
}
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_span_entry *span_entry)
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
{
- u8 local_port = span_entry->local_port;
- char mpat_pl[MLXSW_REG_MPAT_LEN];
- int pa_id = span_entry->id;
-
- mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
- mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+ mlxsw_sp_span_entry_deconfigure(span_entry);
}
struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
{
int i;
for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
- if (curr->ref_count && curr->local_port == local_port)
+ if (curr->ref_count && curr->to_dev == to_dev)
return curr;
}
return NULL;
}
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure(span_entry);
+ span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ if (curr->ref_count && curr->id == span_id)
+ return curr;
+ }
+ return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
{
struct mlxsw_sp_span_entry *span_entry;
- span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
- port->local_port);
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
if (span_entry) {
/* Already exists, just take a reference */
span_entry->ref_count++;
return span_entry;
}
- return mlxsw_sp_span_entry_create(port);
+ return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
}
static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
@@ -150,7 +540,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
{
WARN_ON(!span_entry->ref_count);
if (--span_entry->ref_count == 0)
- mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+ mlxsw_sp_span_entry_destroy(span_entry);
return 0;
}
@@ -312,15 +702,41 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
kfree(inspected_port);
}
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+ if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+ return mlxsw_sp_span_entry_types[i];
+
+ return NULL;
+}
+
int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
- struct mlxsw_sp_port *to,
- enum mlxsw_sp_span_type type, bool bind)
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type, bool bind,
+ int *p_span_id)
{
struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms sparms = {0};
struct mlxsw_sp_span_entry *span_entry;
int err;
- span_entry = mlxsw_sp_span_entry_get(to);
+ ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+ if (!ops) {
+ netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+ return -EOPNOTSUPP;
+ }
+
+ err = ops->parms(to_dev, &sparms);
+ if (err)
+ return err;
+
+ span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
if (!span_entry)
return -ENOENT;
@@ -331,6 +747,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
if (err)
goto err_port_bind;
+ *p_span_id = span_entry->id;
return 0;
err_port_bind:
@@ -338,13 +755,12 @@ err_port_bind:
return err;
}
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
enum mlxsw_sp_span_type type, bool bind)
{
struct mlxsw_sp_span_entry *span_entry;
- span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
- destination_port);
+ span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
if (!span_entry) {
netdev_err(from->dev, "no span entry found\n");
return;
@@ -354,3 +770,27 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
span_entry->id);
mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
}
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+ int err;
+
+ ASSERT_RTNL();
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+ struct mlxsw_sp_span_parms sparms = {0};
+
+ if (!curr->ref_count)
+ continue;
+
+ err = curr->ops->parms(curr->to_dev, &sparms);
+ if (err)
+ continue;
+
+ if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+ mlxsw_sp_span_entry_deconfigure(curr);
+ mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+ }
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 069050e385ff..948aceb512c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -35,6 +35,9 @@
#define _MLXSW_SPECTRUM_SPAN_H
#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
struct mlxsw_sp;
struct mlxsw_sp_port;
@@ -50,23 +53,51 @@ struct mlxsw_sp_span_inspected_port {
u8 local_port;
};
+struct mlxsw_sp_span_parms {
+ struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+ unsigned int ttl;
+ unsigned char dmac[ETH_ALEN];
+ unsigned char smac[ETH_ALEN];
+ union mlxsw_sp_l3addr daddr;
+ union mlxsw_sp_l3addr saddr;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
struct mlxsw_sp_span_entry {
- u8 local_port;
+ const struct net_device *to_dev;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms parms;
struct list_head bound_ports_list;
int ref_count;
int id;
};
+struct mlxsw_sp_span_entry_ops {
+ bool (*can_handle)(const struct net_device *to_dev);
+ int (*parms)(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp);
+ int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms);
+ void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
- struct mlxsw_sp_port *to,
- enum mlxsw_sp_span_type type, bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type,
+ bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
enum mlxsw_sp_span_type type, bool bind);
struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry);
int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index f9f53af04fe1..917663adf925 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1882,14 +1882,10 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct net_device *dev = bridge_port->dev;
u16 vid;
- if (!is_vlan_dev(bridge_port->dev)) {
- NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge");
- return -EINVAL;
- }
- vid = vlan_dev_vlan_id(bridge_port->dev);
-
+ vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
if (WARN_ON(!mlxsw_sp_port_vlan))
return -EINVAL;
@@ -1912,8 +1908,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_port *mlxsw_sp_port)
{
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
- u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+ struct net_device *dev = bridge_port->dev;
+ u16 vid;
+ vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
if (WARN_ON(!mlxsw_sp_port_vlan))
return;
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index d5b28884e21e..51fa82b429a3 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -60,14 +60,6 @@ do { \
*((volatile unsigned int *)dev->base_addr+(reg)) = (val); \
} while (0)
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
/*
* We cannot use station (ethernet) address prefixes to detect the
* sonic controller since these are board manufacturer depended.
@@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = {
static int sonic_probe1(struct net_device *dev)
{
- static unsigned version_printed;
unsigned int silicon_revision;
unsigned int val;
struct sonic_local *lp = netdev_priv(dev);
@@ -133,26 +124,17 @@ static int sonic_probe1(struct net_device *dev)
* the expected location.
*/
silicon_revision = SONIC_READ(SONIC_SR);
- if (sonic_debug > 1)
- printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
i = 0;
while (known_revisions[i] != 0xffff &&
known_revisions[i] != silicon_revision)
i++;
if (known_revisions[i] == 0xffff) {
- printk("SONIC ethernet controller not found (0x%4x)\n",
- silicon_revision);
+ pr_info("SONIC ethernet controller not found (0x%4x)\n",
+ silicon_revision);
goto out;
}
- if (sonic_debug && version_printed++ == 0)
- printk(version);
-
- printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
- dev_name(lp->device), dev->base_addr);
-
/*
* Put the sonic into software reset, then
* retrieve and print the ethernet address.
@@ -245,12 +227,16 @@ static int jazz_sonic_probe(struct platform_device *pdev)
err = sonic_probe1(dev);
if (err)
goto out;
+
+ pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
+ sonic_msg_init(dev);
+
err = register_netdev(dev);
if (err)
goto out1;
- printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
return 0;
out1:
@@ -262,8 +248,6 @@ out:
}
MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
MODULE_ALIAS("platform:jazzsonic");
#include "sonic.c"
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index b922ab5cedea..0937fc2a928e 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -60,8 +60,6 @@
#include <asm/macints.h>
#include <asm/mac_via.h>
-static char mac_sonic_string[] = "macsonic";
-
#include "sonic.h"
/* These should basically be bus-size and endian independent (since
@@ -72,15 +70,6 @@ static char mac_sonic_string[] = "macsonic";
#define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
+ lp->reg_offset))
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
/* For onboard SONIC */
#define ONBOARD_SONIC_REGISTERS 0x50F0A000
#define ONBOARD_SONIC_PROM_BASE 0x50f08000
@@ -313,11 +302,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
int sr;
bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
- if (!MACH_IS_MAC)
- return -ENODEV;
-
- printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
/* Bogus probing, on the models which may or may not have
Ethernet (BTW, the Ethernet *is* always at the same
address, and nothing else lives there, at least if Apple's
@@ -327,13 +311,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
if (!card_present) {
- printk("none.\n");
+ pr_info("Onboard/comm-slot SONIC not found\n");
return -ENODEV;
}
}
- printk("yes\n");
-
/* Danger! My arms are flailing wildly! You *must* set lp->reg_offset
* and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
dev->base_addr = ONBOARD_SONIC_REGISTERS;
@@ -342,18 +324,10 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
else
dev->irq = IRQ_NUBUS_9;
- if (!sonic_version_printed) {
- printk(KERN_INFO "%s", version);
- sonic_version_printed = 1;
- }
- printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
- dev_name(lp->device), dev->base_addr);
-
/* The PowerBook's SONIC is 16 bit always. */
if (macintosh_config->ident == MAC_MODEL_PB520) {
lp->reg_offset = 0;
lp->dma_bitmode = SONIC_BITMODE16;
- sr = SONIC_READ(SONIC_SR);
} else if (commslot) {
/* Some of the comm-slot cards are 16 bit. But some
of them are not. The 32-bit cards use offset 2 and
@@ -370,22 +344,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
else {
lp->dma_bitmode = SONIC_BITMODE16;
lp->reg_offset = 0;
- sr = SONIC_READ(SONIC_SR);
}
} else {
/* All onboard cards are at offset 2 with 32 bit DMA. */
lp->reg_offset = 2;
lp->dma_bitmode = SONIC_BITMODE32;
- sr = SONIC_READ(SONIC_SR);
}
- printk(KERN_INFO
- "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
- dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
- printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
- SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+ pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+ SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+ lp->reg_offset);
+
+ /* This is sometimes useful to find out how MacOS configured the card */
+ pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+ SONIC_READ(SONIC_DCR) & 0xffff,
+ SONIC_READ(SONIC_DCR2) & 0xffff);
/* Software reset, then initialize control registers. */
SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -406,11 +379,14 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
/* Now look for the MAC address. */
mac_onboard_sonic_ethernet_addr(dev);
+ pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
/* Shared init code */
return macsonic_init(dev);
}
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
unsigned long prom_addr, int id)
{
int i;
@@ -449,70 +425,49 @@ static int macsonic_ident(struct nubus_rsrc *fres)
return -1;
}
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+ struct net_device *dev)
{
- static int slots;
- struct nubus_rsrc *ndev = NULL;
struct sonic_local* lp = netdev_priv(dev);
unsigned long base_addr, prom_addr;
u16 sonic_dcr;
- int id = -1;
int reg_offset, dma_bitmode;
- /* Find the first SONIC that hasn't been initialized already */
- for_each_func_rsrc(ndev) {
- if (ndev->category != NUBUS_CAT_NETWORK ||
- ndev->type != NUBUS_TYPE_ETHERNET)
- continue;
-
- /* Have we seen it already? */
- if (slots & (1<<ndev->board->slot))
- continue;
- slots |= 1<<ndev->board->slot;
-
- /* Is it one of ours? */
- if ((id = macsonic_ident(ndev)) != -1)
- break;
- }
-
- if (ndev == NULL)
- return -ENODEV;
-
switch (id) {
case MACSONIC_DUODOCK:
- base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+ base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
SONIC_DCR_TFT0;
reg_offset = 2;
dma_bitmode = SONIC_BITMODE32;
break;
case MACSONIC_APPLE:
- base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+ base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
reg_offset = 0;
dma_bitmode = SONIC_BITMODE32;
break;
case MACSONIC_APPLE16:
- base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+ base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
SONIC_DCR_PO1 | SONIC_DCR_BMS;
reg_offset = 0;
dma_bitmode = SONIC_BITMODE16;
break;
case MACSONIC_DAYNALINK:
- base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+ base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
SONIC_DCR_PO1 | SONIC_DCR_BMS;
reg_offset = 0;
dma_bitmode = SONIC_BITMODE16;
break;
case MACSONIC_DAYNA:
- base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
- prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+ base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+ prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
sonic_dcr = SONIC_DCR_BMS |
SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
reg_offset = 0;
@@ -528,21 +483,16 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
dev->base_addr = base_addr;
lp->reg_offset = reg_offset;
lp->dma_bitmode = dma_bitmode;
- dev->irq = SLOT2IRQ(ndev->board->slot);
+ dev->irq = SLOT2IRQ(board->slot);
- if (!sonic_version_printed) {
- printk(KERN_INFO "%s", version);
- sonic_version_printed = 1;
- }
- printk(KERN_INFO "%s: %s in slot %X\n",
- dev_name(lp->device), ndev->board->name, ndev->board->slot);
- printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
- dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
+ dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+ board->name, SONIC_READ(SONIC_SR),
+ lp->dma_bitmode ? 32 : 16, lp->reg_offset);
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
- printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
- SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+ /* This is sometimes useful to find out how MacOS configured the card */
+ dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+ SONIC_READ(SONIC_DCR) & 0xffff,
+ SONIC_READ(SONIC_DCR2) & 0xffff);
/* Software reset, then initialize control registers. */
SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -557,14 +507,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
SONIC_WRITE(SONIC_ISR, 0x7fff);
/* Now look for the MAC address. */
- if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+ if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
return -ENODEV;
+ dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
/* Shared init code */
return macsonic_init(dev);
}
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
{
struct net_device *dev;
struct sonic_local *lp;
@@ -579,22 +532,16 @@ static int mac_sonic_probe(struct platform_device *pdev)
SET_NETDEV_DEV(dev, &pdev->dev);
platform_set_drvdata(pdev, dev);
- /* This will catch fatal stuff like -ENOMEM as well as success */
err = mac_onboard_sonic_probe(dev);
- if (err == 0)
- goto found;
- if (err != -ENODEV)
- goto out;
- err = mac_nubus_sonic_probe(dev);
if (err)
goto out;
-found:
+
+ sonic_msg_init(dev);
+
err = register_netdev(dev);
if (err)
goto out;
- printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
return 0;
out:
@@ -604,13 +551,11 @@ out:
}
MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
MODULE_ALIAS("platform:macsonic");
#include "sonic.c"
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
{
struct net_device *dev = platform_get_drvdata(pdev);
struct sonic_local* lp = netdev_priv(dev);
@@ -623,12 +568,105 @@ static int mac_sonic_device_remove(struct platform_device *pdev)
return 0;
}
-static struct platform_driver mac_sonic_driver = {
- .probe = mac_sonic_probe,
- .remove = mac_sonic_device_remove,
- .driver = {
- .name = mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+ .probe = mac_sonic_platform_probe,
+ .remove = mac_sonic_platform_remove,
+ .driver = {
+ .name = "macsonic",
+ },
+};
+
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+ struct net_device *ndev;
+ struct sonic_local *lp;
+ struct nubus_rsrc *fres;
+ int id = -1;
+ int err;
+
+ /* The platform driver will handle a PDS or Comm Slot card (even if
+ * it has a pseudoslot declaration ROM).
+ */
+ if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+ return -ENODEV;
+
+ for_each_board_func_rsrc(board, fres) {
+ if (fres->category != NUBUS_CAT_NETWORK ||
+ fres->type != NUBUS_TYPE_ETHERNET)
+ continue;
+
+ id = macsonic_ident(fres);
+ if (id != -1)
+ break;
+ }
+ if (!fres)
+ return -ENODEV;
+
+ ndev = alloc_etherdev(sizeof(struct sonic_local));
+ if (!ndev)
+ return -ENOMEM;
+
+ lp = netdev_priv(ndev);
+ lp->device = &board->dev;
+ SET_NETDEV_DEV(ndev, &board->dev);
+
+ err = mac_sonic_nubus_probe_board(board, id, ndev);
+ if (err)
+ goto out;
+
+ sonic_msg_init(ndev);
+
+ err = register_netdev(ndev);
+ if (err)
+ goto out;
+
+ nubus_set_drvdata(board, ndev);
+
+ return 0;
+
+out:
+ free_netdev(ndev);
+ return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+ struct net_device *ndev = nubus_get_drvdata(board);
+ struct sonic_local *lp = netdev_priv(ndev);
+
+ unregister_netdev(ndev);
+ dma_free_coherent(lp->device,
+ SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+ lp->descriptors, lp->descriptors_laddr);
+ free_netdev(ndev);
+
+ return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+ .probe = mac_sonic_nubus_probe,
+ .remove = mac_sonic_nubus_remove,
+ .driver = {
+ .name = "macsonic-nubus",
+ .owner = THIS_MODULE,
},
};
-module_platform_driver(mac_sonic_driver);
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+ perr = platform_driver_register(&mac_sonic_platform_driver);
+ nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+ return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+ if (!perr)
+ platform_driver_unregister(&mac_sonic_platform_driver);
+ if (!nerr)
+ nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index 612c7a44b26c..7ed08486ae23 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -33,7 +33,21 @@
* the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
*/
+static unsigned int version_printed;
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+ struct sonic_local *lp = netdev_priv(dev);
+
+ lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+ if (version_printed++ == 0)
+ netif_dbg(lp, drv, dev, "%s", version);
+}
/*
* Open/initialize the SONIC controller.
@@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev)
struct sonic_local *lp = netdev_priv(dev);
int i;
- if (sonic_debug > 2)
- printk("sonic_open: initializing sonic driver.\n");
+ netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
for (i = 0; i < SONIC_NUM_RRS; i++) {
struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
@@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev)
netif_start_queue(dev);
- if (sonic_debug > 2)
- printk("sonic_open: Initialization done.\n");
+ netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
return 0;
}
@@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev)
struct sonic_local *lp = netdev_priv(dev);
int i;
- if (sonic_debug > 2)
- printk("sonic_close\n");
+ netif_dbg(lp, ifdown, dev, "%s\n", __func__);
netif_stop_queue(dev);
@@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
int length;
int entry = lp->next_tx;
- if (sonic_debug > 2)
- printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+ netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
length = skb->len;
if (length < ETH_ZLEN) {
@@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
if (lp->tx_skb[lp->next_tx] != NULL) {
/* The ring is full, the ISR has yet to process the next TD. */
- if (sonic_debug > 3)
- printk("%s: stopping queue\n", dev->name);
+ netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
netif_stop_queue(dev);
/* after this packet, wait for ISR to free up some TDAs */
} else netif_start_queue(dev);
- if (sonic_debug > 2)
- printk("sonic_send_packet: issuing Tx command\n");
+ netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
@@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
do {
if (status & SONIC_INT_PKTRX) {
- if (sonic_debug > 2)
- printk("%s: packet rx\n", dev->name);
+ netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
sonic_rx(dev); /* got packet(s) */
SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
}
@@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
* still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
*/
- if (sonic_debug > 2)
- printk("%s: tx done\n", dev->name);
+ netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
while (lp->tx_skb[entry] != NULL) {
if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
@@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
* check error conditions
*/
if (status & SONIC_INT_RFO) {
- if (sonic_debug > 1)
- printk("%s: rx fifo overrun\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+ __func__);
lp->stats.rx_fifo_errors++;
SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
}
if (status & SONIC_INT_RDE) {
- if (sonic_debug > 1)
- printk("%s: rx descriptors exhausted\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+ __func__);
lp->stats.rx_dropped++;
SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
}
if (status & SONIC_INT_RBAE) {
- if (sonic_debug > 1)
- printk("%s: rx buffer area exceeded\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+ __func__);
lp->stats.rx_dropped++;
SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
}
@@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
/* transmit error */
if (status & SONIC_INT_TXER) {
- if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
- printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+ if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+ netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+ __func__);
SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
}
@@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev)
if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
- if (sonic_debug > 2)
- printk("%s: rx buffer exhausted\n", dev->name);
+ netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+ __func__);
SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
}
} else
@@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev)
(netdev_mc_count(dev) > 15)) {
rcr |= SONIC_RCR_AMC;
} else {
- if (sonic_debug > 2)
- printk("sonic_multicast_list: mc_count %d\n",
- netdev_mc_count(dev));
+ netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+ netdev_mc_count(dev));
sonic_set_cam_enable(dev, 1); /* always enable our own address */
i = 1;
netdev_for_each_mc_addr(ha, dev) {
@@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev)
}
}
- if (sonic_debug > 2)
- printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+ netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
SONIC_WRITE(SONIC_RCR, rcr);
}
@@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev)
/*
* initialize the receive resource area
*/
- if (sonic_debug > 2)
- printk("sonic_init: initialize receive resource area\n");
+ netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+ __func__);
for (i = 0; i < SONIC_NUM_RRS; i++) {
u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
@@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev)
SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
/* load the resource pointers */
- if (sonic_debug > 3)
- printk("sonic_init: issuing RRRA command\n");
+ netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
i = 0;
@@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev)
break;
}
- if (sonic_debug > 2)
- printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+ netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+ SONIC_READ(SONIC_CMD), i);
/*
* Initialize the receive descriptors so that they
* become a circular linked list, ie. let the last
* descriptor point to the first again.
*/
- if (sonic_debug > 2)
- printk("sonic_init: initialize receive descriptors\n");
+ netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+ __func__);
+
for (i=0; i<SONIC_NUM_RDS; i++) {
sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
@@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev)
/*
* initialize transmit descriptors
*/
- if (sonic_debug > 2)
- printk("sonic_init: initialize transmit descriptors\n");
+ netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+ __func__);
+
for (i = 0; i < SONIC_NUM_TDS; i++) {
sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
@@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev)
if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
break;
}
- if (sonic_debug > 2) {
- printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
- SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
- }
+ netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+ SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
/*
* enable receiver, disable loopback
@@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev)
if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
- if (sonic_debug > 2)
- printk("sonic_init: new status=%x\n",
- SONIC_READ(SONIC_CMD));
+ netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+ SONIC_READ(SONIC_CMD));
return 0;
}
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 421b1a283fed..2b27f7049acb 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -319,6 +319,7 @@ struct sonic_local {
unsigned int eol_rx;
unsigned int eol_tx; /* last unacked transmit packet */
unsigned int next_tx; /* next free TD */
+ int msg_enable;
struct device *device; /* generic device */
struct net_device_stats stats;
};
@@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev);
static void sonic_multicast_list(struct net_device *dev);
static int sonic_init(struct net_device *dev);
static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
/* Internal inlines for reading/writing DMA buffers. Note that bus
size and endianness matter here, whereas they don't for registers,
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 1817deea98a4..e1b886e87a76 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf);
#define SONIC_WRITE(reg,val) \
*((volatile unsigned int *)dev->base_addr+reg) = val
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
/*
* We cannot use station (ethernet) address prefixes to detect the
* sonic controller since these are board manufacturer depended.
@@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
static int __init sonic_probe1(struct net_device *dev)
{
- static unsigned version_printed = 0;
unsigned int silicon_revision;
struct sonic_local *lp = netdev_priv(dev);
unsigned int base_addr = dev->base_addr;
@@ -146,23 +137,17 @@ static int __init sonic_probe1(struct net_device *dev)
* the expected location.
*/
silicon_revision = SONIC_READ(SONIC_SR);
- if (sonic_debug > 1)
- printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
i = 0;
while ((known_revisions[i] != 0xffff) &&
(known_revisions[i] != silicon_revision))
i++;
if (known_revisions[i] == 0xffff) {
- printk("SONIC ethernet controller not found (0x%4x)\n",
- silicon_revision);
+ pr_info("SONIC ethernet controller not found (0x%4x)\n",
+ silicon_revision);
return -ENODEV;
}
- if (sonic_debug && version_printed++ == 0)
- printk(version);
-
/*
* Put the sonic into software reset, then retrieve ethernet address.
* Note: we are assuming that the boot-loader has initialized the cam.
@@ -273,12 +258,15 @@ int xtsonic_probe(struct platform_device *pdev)
if ((err = sonic_probe1(dev)))
goto out;
+
+ pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+ dev->base_addr, dev->dev_addr, dev->irq);
+
+ sonic_msg_init(dev);
+
if ((err = register_netdev(dev)))
goto out1;
- printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
- dev->base_addr, dev->dev_addr, dev->irq);
-
return 0;
out1:
@@ -290,8 +278,6 @@ out:
}
MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
#include "sonic.c"
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ca4a81dc1ace..03ad4eeac7f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type {
/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
#define QED_IWARP_MAX_BDS_PER_FPDU 3
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
"QED_IWARP_MPA_PKT_PACKED",
"QED_IWARP_MPA_PKT_PARTIAL",
"QED_IWARP_MPA_PKT_UNALIGNED"
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 96db3283eecf..6fd13334aa28 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -735,11 +735,6 @@ struct ring_info {
u8 __pad[sizeof(void *) - sizeof(u32)];
};
-enum features {
- RTL_FEATURE_MSI = (1 << 0),
- RTL_FEATURE_GMII = (1 << 1),
-};
-
struct rtl8169_counters {
__le64 tx_packets;
__le64 rx_packets;
@@ -7847,7 +7842,7 @@ static int rtl8169_close(struct net_device *dev)
cancel_work_sync(&tp->wk.work);
- free_irq(pdev->irq, dev);
+ pci_free_irq(pdev, 0, dev);
dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
tp->RxPhyAddr);
@@ -7866,7 +7861,7 @@ static void rtl8169_netpoll(struct net_device *dev)
{
struct rtl8169_private *tp = netdev_priv(dev);
- rtl8169_interrupt(tp->pci_dev->irq, dev);
+ rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
}
#endif
@@ -7903,9 +7898,8 @@ static int rtl_open(struct net_device *dev)
rtl_request_firmware(tp);
- retval = request_irq(pdev->irq, rtl8169_interrupt,
- (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
- dev->name, dev);
+ retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+ dev->name);
if (retval < 0)
goto err_release_fw_2;
@@ -8235,7 +8229,7 @@ static const struct rtl_cfg_info {
unsigned int region;
unsigned int align;
u16 event_slow;
- unsigned features;
+ unsigned int has_gmii:1;
const struct rtl_coalesce_info *coalesce_info;
u8 default_ver;
} rtl_cfg_infos [] = {
@@ -8244,7 +8238,7 @@ static const struct rtl_cfg_info {
.region = 1,
.align = 0,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver,
- .features = RTL_FEATURE_GMII,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8169,
.default_ver = RTL_GIGA_MAC_VER_01,
},
@@ -8253,7 +8247,7 @@ static const struct rtl_cfg_info {
.region = 2,
.align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow,
- .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+ .has_gmii = 1,
.coalesce_info = rtl_coalesce_info_8168_8136,
.default_ver = RTL_GIGA_MAC_VER_11,
},
@@ -8263,32 +8257,26 @@ static const struct rtl_cfg_info {
.align = 8,
.event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver |
PCSTimeout,
- .features = RTL_FEATURE_MSI,
.coalesce_info = rtl_coalesce_info_8168_8136,
.default_ver = RTL_GIGA_MAC_VER_13,
}
};
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
- const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
{
void __iomem *ioaddr = tp->mmio_addr;
- unsigned msi = 0;
- u8 cfg2;
+ unsigned int flags;
- cfg2 = RTL_R8(Config2) & ~MSIEnable;
- if (cfg->features & RTL_FEATURE_MSI) {
- if (pci_enable_msi(tp->pci_dev)) {
- netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
- } else {
- cfg2 |= MSIEnable;
- msi = RTL_FEATURE_MSI;
- }
+ if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+ RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(Config2, RTL_R8(Config2) & ~MSIEnable);
+ RTL_W8(Cfg9346, Cfg9346_Lock);
+ flags = PCI_IRQ_LEGACY;
+ } else {
+ flags = PCI_IRQ_ALL_TYPES;
}
- if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
- RTL_W8(Config2, cfg2);
- return msi;
+
+ return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
}
DECLARE_RTL_COND(rtl_link_list_ready_cond)
@@ -8402,7 +8390,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
mii->mdio_write = rtl_mdio_write;
mii->phy_id_mask = 0x1f;
mii->reg_num_mask = 0x1f;
- mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+ mii->supports_gmii = cfg->has_gmii;
/* disable ASPM completely as that cause random device stop working
* problems as well as full system hangs for some PCIe devices users */
@@ -8497,9 +8485,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
chipset = tp->mac_version;
tp->txd_version = rtl_chip_infos[chipset].txd_version;
- RTL_W8(Cfg9346, Cfg9346_Unlock);
- tp->features |= rtl_try_msi(tp, cfg);
- RTL_W8(Cfg9346, Cfg9346_Lock);
+ rc = rtl_alloc_irq(tp);
+ if (rc < 0) {
+ netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+ return rc;
+ }
/* override BIOS settings, use userspace tools to enable WOL */
__rtl8169_set_wol(tp, 0);
@@ -8618,7 +8608,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
- (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+ (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff),
+ pci_irq_vector(pdev, 0));
if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
"tx checksumming: %s]\n",
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d7d5a6d15219..d3e1bc05ca9c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -123,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
[TSU_FWSL0] = 0x0030,
[TSU_FWSL1] = 0x0034,
[TSU_FWSLC] = 0x0038,
- [TSU_QTAG0] = 0x0040,
- [TSU_QTAG1] = 0x0044,
+ [TSU_QTAGM0] = 0x0040,
+ [TSU_QTAGM1] = 0x0044,
[TSU_FWSR] = 0x0050,
[TSU_FWINMK] = 0x0054,
[TSU_ADQT0] = 0x0048,
@@ -752,6 +752,7 @@ static struct sh_eth_cpu_data sh7757_data = {
.rpadir = 1,
.rpadir_value = 2 << 16,
.rtrate = 1,
+ .dual_port = 1,
};
#define SH_GIGA_ETH_BASE 0xfee00000UL
@@ -830,6 +831,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
.no_trimd = 1,
.no_ade = 1,
.tsu = 1,
+ .dual_port = 1,
};
/* SH7734 */
@@ -900,6 +902,7 @@ static struct sh_eth_cpu_data sh7763_data = {
.tsu = 1,
.irq_flags = IRQF_SHARED,
.magic = 1,
+ .dual_port = 1,
};
static struct sh_eth_cpu_data sh7619_data = {
@@ -932,6 +935,7 @@ static struct sh_eth_cpu_data sh771x_data = {
EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
EESIPR_PREIP | EESIPR_CERFIP,
.tsu = 1,
+ .dual_port = 1,
};
static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
@@ -2097,8 +2101,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
add_tsu_reg(TSU_FWSL0);
add_tsu_reg(TSU_FWSL1);
add_tsu_reg(TSU_FWSLC);
- add_tsu_reg(TSU_QTAG0);
- add_tsu_reg(TSU_QTAG1);
add_tsu_reg(TSU_QTAGM0);
add_tsu_reg(TSU_QTAGM1);
add_tsu_reg(TSU_FWSR);
@@ -2917,7 +2919,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev,
/* SuperH's TSU register init function */
static void sh_eth_tsu_init(struct sh_eth_private *mdp)
{
- if (sh_eth_is_rz_fast_ether(mdp)) {
+ if (!mdp->cd->dual_port) {
sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
TSU_FWSLC); /* Enable POST registers */
@@ -2934,13 +2936,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
- if (sh_eth_is_gether(mdp)) {
- sh_eth_tsu_write(mdp, 0, TSU_QTAG0); /* Disable QTAG(0->1) */
- sh_eth_tsu_write(mdp, 0, TSU_QTAG1); /* Disable QTAG(1->0) */
- } else {
- sh_eth_tsu_write(mdp, 0, TSU_QTAGM0); /* Disable QTAG(0->1) */
- sh_eth_tsu_write(mdp, 0, TSU_QTAGM1); /* Disable QTAG(1->0) */
- }
+ sh_eth_tsu_write(mdp, 0, TSU_QTAGM0); /* Disable QTAG(0->1) */
+ sh_eth_tsu_write(mdp, 0, TSU_QTAGM1); /* Disable QTAG(1->0) */
sh_eth_tsu_write(mdp, 0, TSU_FWSR); /* all interrupt status clear */
sh_eth_tsu_write(mdp, 0, TSU_FWINMK); /* Disable all interrupt */
sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index a6753ccba711..5bbaf9e56e92 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -118,8 +118,8 @@ enum {
TSU_FWSL0,
TSU_FWSL1,
TSU_FWSLC,
- TSU_QTAG0,
- TSU_QTAG1,
+ TSU_QTAG0, /* Same as TSU_QTAGM0 */
+ TSU_QTAG1, /* Same as TSU_QTAGM1 */
TSU_QTAGM0,
TSU_QTAGM1,
TSU_FWSR,
@@ -509,6 +509,7 @@ struct sh_eth_cpu_data {
unsigned rmiimode:1; /* EtherC has RMIIMODE register */
unsigned rtrate:1; /* EtherC has RTRATE register */
unsigned magic:1; /* EtherC has ECMR.MPDE and ECSR.MPD */
+ unsigned dual_port:1; /* Dual EtherC/E-DMAC */
};
struct sh_eth_private {
diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h
index 30a1136fc909..4824fcf5c3d4 100644
--- a/drivers/net/ethernet/sfc/falcon/enum.h
+++ b/drivers/net/ethernet/sfc/falcon/enum.h
@@ -81,7 +81,6 @@ enum ef4_loopback_mode {
(1 << LOOPBACK_XAUI) | \
(1 << LOOPBACK_GMII) | \
(1 << LOOPBACK_SGMII) | \
- (1 << LOOPBACK_SGMII) | \
(1 << LOOPBACK_XGBR) | \
(1 << LOOPBACK_XFI) | \
(1 << LOOPBACK_XAUI_FAR) | \
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index c728ffa095de..2a6521d33e43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
{
+ p->des0 = 0;
+ p->des1 = 0;
p->des2 = 0;
p->des3 = 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c8d86d77e03d..a9856a8bf8ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1844,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
if (unlikely(status & tx_dma_own))
break;
+ /* Make sure descriptor fields are read after reading
+ * the own bit.
+ */
+ dma_rmb();
+
/* Just consider the last segment and ...*/
if (likely(!(status & tx_not_ls))) {
/* ... verify the status error condition */
@@ -2983,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
/* If context desc is used to change MSS */
- if (mss_desc)
+ if (mss_desc) {
+ /* Make sure that first descriptor has been completely
+ * written, including its own bit. This is because MSS is
+ * actually before first descriptor, so we need to make
+ * sure that MSS's own bit is the last thing written.
+ */
+ dma_wmb();
priv->hw->desc->set_tx_owner(mss_desc);
+ }
/* The own bit must be the latest setting done when prepare the
* descriptor and then barrier is needed to make sure that
* all is coherent before granting the DMA engine.
*/
- dma_wmb();
+ wmb();
if (netif_msg_pktdata(priv)) {
pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3214,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
* descriptor and then barrier is needed to make sure that
* all is coherent before granting the DMA engine.
*/
- dma_wmb();
+ wmb();
}
netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89a9b93..516dd59249d7 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = {
.exit_batch = geneve_exit_batch_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
+ .async = true,
};
static int __init geneve_init_module(void)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index f38e32a7ec9c..127edd23018f 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = {
.exit = gtp_net_exit,
.id = &gtp_net_id,
.size = sizeof(struct gtp_net),
+ .async = true,
};
static int __init gtp_init(void)
diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig
index 303ba4133920..8782f5655e3f 100644
--- a/drivers/net/ieee802154/Kconfig
+++ b/drivers/net/ieee802154/Kconfig
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
exposes a debugfs node for each CA8210 instance which allows
direct use of the Cascoda API, exposing the 802.15.4 MAC
management entities.
+
+config IEEE802154_MCR20A
+ tristate "MCR20A transceiver driver"
+ depends on IEEE802154_DRIVERS && MAC802154
+ depends on SPI
+ ---help---
+ Say Y here to enable the MCR20A SPI 802.15.4 wireless
+ controller.
+
+ This driver can also be built as a module. To do so, say M here.
+ the module will be called 'mcr20a'.
diff --git a/drivers/net/ieee802154/Makefile b/drivers/net/ieee802154/Makefile
index bea1de5e726c..104744d5a668 100644
--- a/drivers/net/ieee802154/Makefile
+++ b/drivers/net/ieee802154/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644
index 000000000000..d9eb22a52551
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define SPI_COMMAND_BUFFER 3
+
+#define REGISTER_READ BIT(7)
+#define REGISTER_WRITE (0 << 7)
+#define REGISTER_ACCESS (0 << 6)
+#define PACKET_BUFF_BURST_ACCESS BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS BIT(5)
+
+#define MCR20A_WRITE_REG(x) (x)
+#define MCR20A_READ_REG(x) (REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF (0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF (0x40)
+
+#define MCR20A_CMD_REG 0x80
+#define MCR20A_CMD_REG_MASK 0x3f
+#define MCR20A_CMD_WRITE 0x40
+#define MCR20A_CMD_FB 0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+ MCR20A_CCA_ED, // energy detect - CCA bit not active,
+ // not to be used for T and CCCA sequences
+ MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+ MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+ MCR20A_CCA_MODE3
+};
+
+enum {
+ MCR20A_XCVSEQ_IDLE = 0x00,
+ MCR20A_XCVSEQ_RX = 0x01,
+ MCR20A_XCVSEQ_TX = 0x02,
+ MCR20A_XCVSEQ_CCA = 0x03,
+ MCR20A_XCVSEQ_TR = 0x04,
+ MCR20A_XCVSEQ_CCCA = 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define MCR20A_MIN_CHANNEL (11)
+#define MCR20A_MAX_CHANNEL (26)
+#define MCR20A_CHANNEL_SPACING (5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8 PLL_INT[16] = {
+ /* 2405 */ 0x0B, /* 2410 */ 0x0B, /* 2415 */ 0x0B,
+ /* 2420 */ 0x0B, /* 2425 */ 0x0B, /* 2430 */ 0x0B,
+ /* 2435 */ 0x0C, /* 2440 */ 0x0C, /* 2445 */ 0x0C,
+ /* 2450 */ 0x0C, /* 2455 */ 0x0C, /* 2460 */ 0x0C,
+ /* 2465 */ 0x0D, /* 2470 */ 0x0D, /* 2475 */ 0x0D,
+ /* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+ /* 2405 */ 0x28, /* 2410 */ 0x50, /* 2415 */ 0x78,
+ /* 2420 */ 0xA0, /* 2425 */ 0xC8, /* 2430 */ 0xF0,
+ /* 2435 */ 0x18, /* 2440 */ 0x40, /* 2445 */ 0x68,
+ /* 2450 */ 0x90, /* 2455 */ 0xB8, /* 2460 */ 0xE0,
+ /* 2465 */ 0x08, /* 2470 */ 0x30, /* 2475 */ 0x58,
+ /* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+ { IAR_MISC_PAD_CTRL, 0x02 },
+ { IAR_VCO_CTRL1, 0xB3 },
+ { IAR_VCO_CTRL2, 0x07 },
+ { IAR_PA_TUNING, 0x71 },
+ { IAR_CHF_IBUF, 0x2F },
+ { IAR_CHF_QBUF, 0x2F },
+ { IAR_CHF_IRIN, 0x24 },
+ { IAR_CHF_QRIN, 0x24 },
+ { IAR_CHF_IL, 0x24 },
+ { IAR_CHF_QL, 0x24 },
+ { IAR_CHF_CC1, 0x32 },
+ { IAR_CHF_CCL, 0x1D },
+ { IAR_CHF_CC2, 0x2D },
+ { IAR_CHF_IROUT, 0x24 },
+ { IAR_CHF_QROUT, 0x24 },
+ { IAR_PA_CAL, 0x28 },
+ { IAR_AGC_THR1, 0x55 },
+ { IAR_AGC_THR2, 0x2D },
+ { IAR_ATT_RSSI1, 0x5F },
+ { IAR_ATT_RSSI2, 0x8F },
+ { IAR_RSSI_OFFSET, 0x61 },
+ { IAR_CHF_PMA_GAIN, 0x03 },
+ { IAR_CCA1_THRESH, 0x50 },
+ { IAR_CORR_NVAL, 0x13 },
+ { IAR_ACKDELAY, 0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+ int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF (127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE 0x01
+#define MCR20A_DAR_READ 0x00
+#define MCR20A_DAR_NUMREGS 0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS 0x80
+#define MCR20A_IAR_NUMREGS 0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg) ((reg) << 1)
+#define MCR20A_WRITESHORT(reg) ((reg) << 1 | 1)
+#define MCR20A_READLONG(reg) (1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg) (1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ case DAR_PHY_CTRL1:
+ case DAR_PHY_CTRL2:
+ case DAR_PHY_CTRL3:
+ case DAR_PHY_CTRL4:
+ case DAR_SRC_CTRL:
+ case DAR_SRC_ADDRS_SUM_LSB:
+ case DAR_SRC_ADDRS_SUM_MSB:
+ case DAR_T3CMP_LSB:
+ case DAR_T3CMP_MSB:
+ case DAR_T3CMP_USB:
+ case DAR_T2PRIMECMP_LSB:
+ case DAR_T2PRIMECMP_MSB:
+ case DAR_T1CMP_LSB:
+ case DAR_T1CMP_MSB:
+ case DAR_T1CMP_USB:
+ case DAR_T2CMP_LSB:
+ case DAR_T2CMP_MSB:
+ case DAR_T2CMP_USB:
+ case DAR_T4CMP_LSB:
+ case DAR_T4CMP_MSB:
+ case DAR_T4CMP_USB:
+ case DAR_PLL_INT0:
+ case DAR_PLL_FRAC0_LSB:
+ case DAR_PLL_FRAC0_MSB:
+ case DAR_PA_PWR:
+ /* no DAR_ACM */
+ case DAR_OVERWRITE_VER:
+ case DAR_CLK_OUT_CTRL:
+ case DAR_PWR_MODES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+ bool rc;
+
+ /* all writeable are also readable */
+ rc = mcr20a_dar_writeable(dev, reg);
+ if (rc)
+ return rc;
+
+ /* readonly regs */
+ switch (reg) {
+ case DAR_RX_FRM_LEN:
+ case DAR_CCA1_ED_FNL:
+ case DAR_EVENT_TMR_LSB:
+ case DAR_EVENT_TMR_MSB:
+ case DAR_EVENT_TMR_USB:
+ case DAR_TIMESTAMP_LSB:
+ case DAR_TIMESTAMP_MSB:
+ case DAR_TIMESTAMP_USB:
+ case DAR_SEQ_STATE:
+ case DAR_LQI_VALUE:
+ case DAR_RSSI_CCA_CONT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+ /* can be changed during runtime */
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ /* use them in spi_async and regmap so it's volatile */
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+ /* don't clear irq line on read */
+ switch (reg) {
+ case DAR_IRQ_STS1:
+ case DAR_IRQ_STS2:
+ case DAR_IRQ_STS3:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+ .name = "mcr20a_dar",
+ .reg_bits = 8,
+ .val_bits = 8,
+ .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE,
+ .read_flag_mask = REGISTER_ACCESS | REGISTER_READ,
+ .cache_type = REGCACHE_RBTREE,
+ .writeable_reg = mcr20a_dar_writeable,
+ .readable_reg = mcr20a_dar_readable,
+ .volatile_reg = mcr20a_dar_volatile,
+ .precious_reg = mcr20a_dar_precious,
+ .fast_io = true,
+ .can_multi_write = true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case IAR_XTAL_TRIM:
+ case IAR_PMC_LP_TRIM:
+ case IAR_MACPANID0_LSB:
+ case IAR_MACPANID0_MSB:
+ case IAR_MACSHORTADDRS0_LSB:
+ case IAR_MACSHORTADDRS0_MSB:
+ case IAR_MACLONGADDRS0_0:
+ case IAR_MACLONGADDRS0_8:
+ case IAR_MACLONGADDRS0_16:
+ case IAR_MACLONGADDRS0_24:
+ case IAR_MACLONGADDRS0_32:
+ case IAR_MACLONGADDRS0_40:
+ case IAR_MACLONGADDRS0_48:
+ case IAR_MACLONGADDRS0_56:
+ case IAR_RX_FRAME_FILTER:
+ case IAR_PLL_INT1:
+ case IAR_PLL_FRAC1_LSB:
+ case IAR_PLL_FRAC1_MSB:
+ case IAR_MACPANID1_LSB:
+ case IAR_MACPANID1_MSB:
+ case IAR_MACSHORTADDRS1_LSB:
+ case IAR_MACSHORTADDRS1_MSB:
+ case IAR_MACLONGADDRS1_0:
+ case IAR_MACLONGADDRS1_8:
+ case IAR_MACLONGADDRS1_16:
+ case IAR_MACLONGADDRS1_24:
+ case IAR_MACLONGADDRS1_32:
+ case IAR_MACLONGADDRS1_40:
+ case IAR_MACLONGADDRS1_48:
+ case IAR_MACLONGADDRS1_56:
+ case IAR_DUAL_PAN_CTRL:
+ case IAR_DUAL_PAN_DWELL:
+ case IAR_CCA1_THRESH:
+ case IAR_CCA1_ED_OFFSET_COMP:
+ case IAR_LQI_OFFSET_COMP:
+ case IAR_CCA_CTRL:
+ case IAR_CCA2_CORR_PEAKS:
+ case IAR_CCA2_CORR_THRESH:
+ case IAR_TMR_PRESCALE:
+ case IAR_ANT_PAD_CTRL:
+ case IAR_MISC_PAD_CTRL:
+ case IAR_BSM_CTRL:
+ case IAR_RNG:
+ case IAR_RX_WTR_MARK:
+ case IAR_SOFT_RESET:
+ case IAR_TXDELAY:
+ case IAR_ACKDELAY:
+ case IAR_CORR_NVAL:
+ case IAR_ANT_AGC_CTRL:
+ case IAR_AGC_THR1:
+ case IAR_AGC_THR2:
+ case IAR_PA_CAL:
+ case IAR_ATT_RSSI1:
+ case IAR_ATT_RSSI2:
+ case IAR_RSSI_OFFSET:
+ case IAR_XTAL_CTRL:
+ case IAR_CHF_PMA_GAIN:
+ case IAR_CHF_IBUF:
+ case IAR_CHF_QBUF:
+ case IAR_CHF_IRIN:
+ case IAR_CHF_QRIN:
+ case IAR_CHF_IL:
+ case IAR_CHF_QL:
+ case IAR_CHF_CC1:
+ case IAR_CHF_CCL:
+ case IAR_CHF_CC2:
+ case IAR_CHF_IROUT:
+ case IAR_CHF_QROUT:
+ case IAR_PA_TUNING:
+ case IAR_VCO_CTRL1:
+ case IAR_VCO_CTRL2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+ bool rc;
+
+ /* all writeable are also readable */
+ rc = mcr20a_iar_writeable(dev, reg);
+ if (rc)
+ return rc;
+
+ /* readonly regs */
+ switch (reg) {
+ case IAR_PART_ID:
+ case IAR_DUAL_PAN_STS:
+ case IAR_RX_BYTE_COUNT:
+ case IAR_FILTERFAIL_CODE1:
+ case IAR_FILTERFAIL_CODE2:
+ case IAR_RSSI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+ switch (reg) {
+ case IAR_DUAL_PAN_STS:
+ case IAR_RX_BYTE_COUNT:
+ case IAR_FILTERFAIL_CODE1:
+ case IAR_FILTERFAIL_CODE2:
+ case IAR_RSSI:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+ .name = "mcr20a_iar",
+ .reg_bits = 16,
+ .val_bits = 8,
+ .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+ .read_flag_mask = REGISTER_ACCESS | REGISTER_READ | IAR_INDEX,
+ .cache_type = REGCACHE_RBTREE,
+ .writeable_reg = mcr20a_iar_writeable,
+ .readable_reg = mcr20a_iar_readable,
+ .volatile_reg = mcr20a_iar_volatile,
+ .fast_io = true,
+};
+
+struct mcr20a_local {
+ struct spi_device *spi;
+
+ struct ieee802154_hw *hw;
+ struct mcr20a_platform_data *pdata;
+ struct regmap *regmap_dar;
+ struct regmap *regmap_iar;
+
+ u8 *buf;
+
+ bool is_tx;
+
+ /* for writing tx buffer */
+ struct spi_message tx_buf_msg;
+ u8 tx_header[1];
+ /* burst buffer write command */
+ struct spi_transfer tx_xfer_header;
+ u8 tx_len[1];
+ /* len of tx packet */
+ struct spi_transfer tx_xfer_len;
+ /* data of tx packet */
+ struct spi_transfer tx_xfer_buf;
+ struct sk_buff *tx_skb;
+
+ /* for read length rxfifo */
+ struct spi_message reg_msg;
+ u8 reg_cmd[1];
+ u8 reg_data[MCR20A_IRQSTS_NUM];
+ struct spi_transfer reg_xfer_cmd;
+ struct spi_transfer reg_xfer_data;
+
+ /* receive handling */
+ struct spi_message rx_buf_msg;
+ u8 rx_header[1];
+ struct spi_transfer rx_xfer_header;
+ u8 rx_lqi[1];
+ struct spi_transfer rx_xfer_lqi;
+ u8 rx_buf[MCR20A_MAX_BUF];
+ struct spi_transfer rx_xfer_buf;
+
+ /* isr handling for reading intstat */
+ struct spi_message irq_msg;
+ u8 irq_header[1];
+ u8 irq_data[MCR20A_IRQSTS_NUM];
+ struct spi_transfer irq_xfer_data;
+ struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ lp->reg_msg.complete = NULL;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+ lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+ lp->reg_xfer_data.len = 1;
+
+ ret = spi_async(lp->spi, &lp->reg_msg);
+ if (ret)
+ dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ lp->tx_skb = skb;
+
+ print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+ skb->data, skb->len, 0);
+
+ lp->is_tx = 1;
+
+ lp->reg_msg.complete = NULL;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+ lp->reg_data[0] = MCR20A_XCVSEQ_IDLE;
+ lp->reg_xfer_data.len = 1;
+
+ return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+ WARN_ON(!level);
+ *level = 0xbe;
+ return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+ if (ret)
+ return ret;
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+ if (ret)
+ return ret;
+ ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+ PLL_FRAC[channel - 11]);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* No slotted operation */
+ dev_dbg(printdev(lp), "no slotted operation\n");
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+ /* enable irq */
+ enable_irq(lp->spi->irq);
+
+ /* Unmask SEQ interrupt */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+ DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+ /* Start the RX sequence */
+ dev_dbg(printdev(lp), "start the RX sequence\n");
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+ return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* stop all running sequence */
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+ /* disable irq */
+ disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+ struct ieee802154_hw_addr_filt *filt,
+ unsigned long changed)
+{
+ struct mcr20a_local *lp = hw->priv;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+ u16 addr = le16_to_cpu(filt->short_addr);
+
+ regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+ regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+ }
+
+ if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+ u16 pan = le16_to_cpu(filt->pan_id);
+
+ regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+ regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+ }
+
+ if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+ u8 addr[8], i;
+
+ memcpy(addr, &filt->ieee_addr, 8);
+ for (i = 0; i < 8; i++)
+ regmap_write(lp->regmap_iar,
+ IAR_MACLONGADDRS0_0 + i, addr[i]);
+ }
+
+ if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+ if (filt->pan_coord) {
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+ } else {
+ regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+ }
+ }
+
+ return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+ -3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+ -1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+ struct mcr20a_local *lp = hw->priv;
+ u32 i;
+
+ dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+ for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+ if (lp->hw->phy->supported.tx_powers[i] == mbm)
+ return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+ ((i + 8) & 0x1F));
+ }
+
+ return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+ const struct wpan_phy_cca *cca)
+{
+ struct mcr20a_local *lp = hw->priv;
+ unsigned int cca_mode = 0xff;
+ bool cca_mode_and = false;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* mapping 802.15.4 to driver spec */
+ switch (cca->mode) {
+ case NL802154_CCA_ENERGY:
+ cca_mode = MCR20A_CCA_MODE1;
+ break;
+ case NL802154_CCA_CARRIER:
+ cca_mode = MCR20A_CCA_MODE2;
+ break;
+ case NL802154_CCA_ENERGY_CARRIER:
+ switch (cca->opt) {
+ case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+ cca_mode = MCR20A_CCA_MODE3;
+ cca_mode_and = true;
+ break;
+ case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+ cca_mode = MCR20A_CCA_MODE3;
+ cca_mode_and = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_CCATYPE_MASK,
+ cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+ if (ret < 0)
+ return ret;
+
+ if (cca_mode == MCR20A_CCA_MODE3) {
+ if (cca_mode_and) {
+ ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+ IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+ 0x08);
+ } else {
+ ret = regmap_update_bits(lp->regmap_iar,
+ IAR_CCA_CTRL,
+ IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+ 0x00);
+ }
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+ struct mcr20a_local *lp = hw->priv;
+ u32 i;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+ if (hw->phy->supported.cca_ed_levels[i] == mbm)
+ return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+ }
+
+ return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+ struct mcr20a_local *lp = hw->priv;
+ int ret;
+ u8 rx_frame_filter_reg = 0x0;
+ u8 val;
+
+ dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+ if (on) {
+ /* All frame types accepted*/
+ val |= DAR_PHY_CTRL4_PROMISCUOUS;
+ rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+ rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+ IAR_RX_FRAME_FLT_NS_FT);
+
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PROMISCUOUS,
+ DAR_PHY_CTRL4_PROMISCUOUS);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ rx_frame_filter_reg);
+ if (ret < 0)
+ return ret;
+ } else {
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+ DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+ if (ret < 0)
+ return ret;
+
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ IAR_RX_FRAME_FLT_FRM_VER |
+ IAR_RX_FRAME_FLT_BEACON_FT |
+ IAR_RX_FRAME_FLT_DATA_FT |
+ IAR_RX_FRAME_FLT_CMD_FT);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+ .owner = THIS_MODULE,
+ .xmit_async = mcr20a_xmit,
+ .ed = mcr20a_ed,
+ .set_channel = mcr20a_set_channel,
+ .start = mcr20a_start,
+ .stop = mcr20a_stop,
+ .set_hw_addr_filt = mcr20a_set_hw_addr_filt,
+ .set_txpower = mcr20a_set_txpower,
+ .set_cca_mode = mcr20a_set_cca_mode,
+ .set_cca_ed_level = mcr20a_set_cca_ed_level,
+ .set_promiscuous_mode = mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* Start the RX sequence */
+ regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+ return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+ struct sk_buff *skb;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ dev_dbg(printdev(lp), "RX is done\n");
+
+ if (!ieee802154_is_valid_psdu_len(len)) {
+ dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+ len = IEEE802154_MTU;
+ }
+
+ len = len - 2; /* get rid of frame check field */
+
+ skb = dev_alloc_skb(len);
+ if (!skb)
+ return;
+
+ memcpy(skb_put(skb, len), lp->rx_buf, len);
+ ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+ print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+ lp->rx_buf, len, 0);
+ pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+ /* start RX sequence */
+ mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 len;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* get the length of received frame */
+ len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+ dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+ /* prepare to read the rx buf */
+ lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+ lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+ lp->rx_xfer_buf.len = len;
+
+ ret = spi_async(lp->spi, &lp->rx_buf_msg);
+ if (ret)
+ dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+ lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+ lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+ lp->reg_xfer_data.len = 1;
+
+ return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+ return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* write tx buffer */
+ lp->tx_header[0] = MCR20A_BURST_WRITE_PACKET_BUF;
+ /* add 2 bytes of FCS */
+ lp->tx_len[0] = lp->tx_skb->len + 2;
+ lp->tx_xfer_buf.tx_buf = lp->tx_skb->data;
+ /* add 1 byte psduLength */
+ lp->tx_xfer_buf.len = lp->tx_skb->len + 1;
+
+ ret = spi_async(lp->spi, &lp->tx_buf_msg);
+ if (ret) {
+ dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+ struct mcr20a_local *lp = context;
+ u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ enable_irq(lp->spi->irq);
+
+ dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+ lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+ switch (seq_state) {
+ /* TX IRQ, RX IRQ and SEQ IRQ */
+ case (0x03):
+ if (lp->is_tx) {
+ lp->is_tx = 0;
+ dev_dbg(printdev(lp), "TX is done. No ACK\n");
+ mcr20a_handle_tx_complete(lp);
+ }
+ break;
+ case (0x05):
+ /* rx is starting */
+ dev_dbg(printdev(lp), "RX is starting\n");
+ mcr20a_handle_rx(lp);
+ break;
+ case (0x07):
+ if (lp->is_tx) {
+ /* tx is done */
+ lp->is_tx = 0;
+ dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+ mcr20a_handle_tx_complete(lp);
+ } else {
+ /* rx is starting */
+ dev_dbg(printdev(lp), "RX is starting\n");
+ mcr20a_handle_rx(lp);
+ }
+ break;
+ case (0x01):
+ if (lp->is_tx) {
+ dev_dbg(printdev(lp), "TX is starting\n");
+ mcr20a_handle_tx(lp);
+ } else {
+ dev_dbg(printdev(lp), "MCR20A is stop\n");
+ }
+ break;
+ }
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+ int ret;
+ struct mcr20a_local *lp = context;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+ regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+ lp->reg_msg.complete = mcr20a_irq_clean_complete;
+ lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+ memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+ lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+ ret = spi_async(lp->spi, &lp->reg_msg);
+
+ if (ret)
+ dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+ struct mcr20a_local *lp = data;
+ int ret;
+
+ disable_irq_nosync(irq);
+
+ lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+ /* read IRQSTSx */
+ ret = spi_async(lp->spi, &lp->irq_msg);
+ if (ret) {
+ enable_irq(irq);
+ return IRQ_NONE;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+ struct mcr20a_platform_data *pdata)
+{
+ int ret = 0;
+
+ if (!spi->dev.of_node)
+ return -EINVAL;
+
+ pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+ dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+ return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+ u8 i;
+ struct ieee802154_hw *hw = lp->hw;
+ struct wpan_phy *phy = lp->hw->phy;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ phy->symbol_duration = 16;
+ phy->lifs_period = 40;
+ phy->sifs_period = 12;
+
+ hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+ IEEE802154_HW_AFILT |
+ IEEE802154_HW_PROMISCUOUS;
+
+ phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+ WPAN_PHY_FLAG_CCA_MODE;
+
+ phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+ BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+ phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+ BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+ /* initiating cca_ed_levels */
+ for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+ ++i) {
+ mcr20a_ed_levels[i] = -i * 100;
+ }
+
+ phy->supported.cca_ed_levels = mcr20a_ed_levels;
+ phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+ phy->cca.mode = NL802154_CCA_ENERGY;
+
+ phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+ phy->current_page = 0;
+ /* MCR20A default reset value */
+ phy->current_channel = 20;
+ phy->symbol_duration = 16;
+ phy->supported.tx_powers = mcr20a_powers;
+ phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+ phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+ phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->tx_buf_msg);
+ lp->tx_buf_msg.context = lp;
+ lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+ lp->tx_xfer_header.len = 1;
+ lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+ lp->tx_xfer_len.len = 1;
+ lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+ spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+ spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+ spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->reg_msg);
+ lp->reg_msg.context = lp;
+
+ lp->reg_xfer_cmd.len = 1;
+ lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+ lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+ lp->reg_xfer_data.rx_buf = lp->reg_data;
+ lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+ spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+ spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+ spi_message_init(&lp->rx_buf_msg);
+ lp->rx_buf_msg.context = lp;
+ lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+ lp->rx_xfer_header.len = 1;
+ lp->rx_xfer_header.tx_buf = lp->rx_header;
+ lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+ lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+ lp->rx_xfer_lqi.len = 1;
+ lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+ spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+ spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+ spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+ spi_message_init(&lp->irq_msg);
+ lp->irq_msg.context = lp;
+ lp->irq_msg.complete = mcr20a_irq_status_complete;
+ lp->irq_xfer_header.len = 1;
+ lp->irq_xfer_header.tx_buf = lp->irq_header;
+ lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+ lp->irq_xfer_data.len = MCR20A_IRQSTS_NUM;
+ lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+ spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+ spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+ u8 index;
+ unsigned int phy_reg = 0;
+ int ret;
+
+ dev_dbg(printdev(lp), "%s\n", __func__);
+
+ /* Disable Tristate on COCO MISO for SPI reads */
+ ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+ if (ret)
+ goto err_ret;
+
+ /* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+ * immediately after init
+ */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+ if (ret)
+ goto err_ret;
+
+ /* Clear all PP IRQ bits in IRQSTS2 */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+ DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+ DAR_IRQSTS2_WAKE_IRQ);
+ if (ret)
+ goto err_ret;
+
+ /* Disable all timer interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+ if (ret)
+ goto err_ret;
+
+ /* PHY_CTRL1 : default HW settings + AUTOACK enabled */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+ DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+ /* PHY_CTRL2 : disable all interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+ if (ret)
+ goto err_ret;
+
+ /* PHY_CTRL3 : disable all timers and remaining interrupts */
+ ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+ DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+ DAR_PHY_CTRL3_WAKE_MSK);
+ if (ret)
+ goto err_ret;
+
+ /* SRC_CTRL : enable Acknowledge Frame Pending and
+ * Source Address Matching Enable
+ */
+ ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+ DAR_SRC_CTRL_ACK_FRM_PND |
+ (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+ if (ret)
+ goto err_ret;
+
+ /* RX_FRAME_FILTER */
+ /* FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+ ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+ IAR_RX_FRAME_FLT_FRM_VER |
+ IAR_RX_FRAME_FLT_BEACON_FT |
+ IAR_RX_FRAME_FLT_DATA_FT |
+ IAR_RX_FRAME_FLT_CMD_FT);
+ if (ret)
+ goto err_ret;
+
+ dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+ MCR20A_OVERWRITE_VERSION);
+
+ /* Overwrites direct registers */
+ ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+ MCR20A_OVERWRITE_VERSION);
+ if (ret)
+ goto err_ret;
+
+ /* Overwrites indirect registers */
+ ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+ ARRAY_SIZE(mar20a_iar_overwrites));
+ if (ret)
+ goto err_ret;
+
+ /* Clear HW indirect queue */
+ dev_dbg(printdev(lp), "clear HW indirect queue\n");
+ for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+ phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+ DAR_SRC_CTRL_INDEX_SHIFT)
+ | (DAR_SRC_CTRL_SRCADDR_EN)
+ | (DAR_SRC_CTRL_INDEX_DISABLE));
+ ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+ if (ret)
+ goto err_ret;
+ phy_reg = 0;
+ }
+
+ /* Assign HW Indirect hash table to PAN0 */
+ ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+ if (ret)
+ goto err_ret;
+
+ /* Clear current lvl */
+ phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+ /* Set new lvl */
+ phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+ IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+ ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+ if (ret)
+ goto err_ret;
+
+ /* Set CCA threshold to -75 dBm */
+ ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+ if (ret)
+ goto err_ret;
+
+ /* Set prescaller to obtain 1 symbol (16us) timebase */
+ ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+ if (ret)
+ goto err_ret;
+
+ /* Enable autodoze mode. */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+ DAR_PWR_MODES_AUTODOZE,
+ DAR_PWR_MODES_AUTODOZE);
+ if (ret)
+ goto err_ret;
+
+ /* Disable clk_out */
+ ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+ DAR_CLK_OUT_CTRL_EN, 0x0);
+ if (ret)
+ goto err_ret;
+
+ return 0;
+
+err_ret:
+ return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+ struct ieee802154_hw *hw;
+ struct mcr20a_local *lp;
+ struct mcr20a_platform_data *pdata;
+ int irq_type;
+ int ret = -ENOMEM;
+
+ dev_dbg(&spi->dev, "%s\n", __func__);
+
+ if (!spi->irq) {
+ dev_err(&spi->dev, "no IRQ specified\n");
+ return -EINVAL;
+ }
+
+ pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ /* set mcr20a platform data */
+ ret = mcr20a_get_platform_data(spi, pdata);
+ if (ret < 0) {
+ dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+ return ret;
+ }
+
+ /* init reset gpio */
+ if (gpio_is_valid(pdata->rst_gpio)) {
+ ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+ GPIOF_OUT_INIT_HIGH, "reset");
+ if (ret)
+ return ret;
+ }
+
+ /* reset mcr20a */
+ if (gpio_is_valid(pdata->rst_gpio)) {
+ usleep_range(10, 20);
+ gpio_set_value_cansleep(pdata->rst_gpio, 0);
+ usleep_range(10, 20);
+ gpio_set_value_cansleep(pdata->rst_gpio, 1);
+ usleep_range(120, 240);
+ }
+
+ /* allocate ieee802154_hw and private data */
+ hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+ if (!hw) {
+ dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+ return -ENOMEM;
+ }
+
+ /* init mcr20a local data */
+ lp = hw->priv;
+ lp->hw = hw;
+ lp->spi = spi;
+ lp->spi->dev.platform_data = pdata;
+ lp->pdata = pdata;
+
+ /* init ieee802154_hw */
+ hw->parent = &spi->dev;
+ ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+ /* init buf */
+ lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+ if (!lp->buf)
+ return -ENOMEM;
+
+ mcr20a_setup_tx_spi_messages(lp);
+ mcr20a_setup_rx_spi_messages(lp);
+ mcr20a_setup_irq_spi_messages(lp);
+
+ /* setup regmap */
+ lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+ if (IS_ERR(lp->regmap_dar)) {
+ ret = PTR_ERR(lp->regmap_dar);
+ dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+ ret);
+ goto free_dev;
+ }
+
+ lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+ if (IS_ERR(lp->regmap_iar)) {
+ ret = PTR_ERR(lp->regmap_iar);
+ dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+ goto free_dev;
+ }
+
+ mcr20a_hw_setup(lp);
+
+ spi_set_drvdata(spi, lp);
+
+ ret = mcr20a_phy_init(lp);
+ if (ret < 0) {
+ dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+ goto free_dev;
+ }
+
+ irq_type = irq_get_trigger_type(spi->irq);
+ if (!irq_type)
+ irq_type = IRQF_TRIGGER_FALLING;
+
+ ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+ irq_type, dev_name(&spi->dev), lp);
+ if (ret) {
+ dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+ ret = -ENODEV;
+ goto free_dev;
+ }
+
+ /* disable_irq by default and wait for starting hardware */
+ disable_irq(spi->irq);
+
+ ret = ieee802154_register_hw(hw);
+ if (ret) {
+ dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+ goto free_dev;
+ }
+
+ return ret;
+
+free_dev:
+ ieee802154_free_hw(lp->hw);
+
+ return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+ struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+ dev_dbg(&spi->dev, "%s\n", __func__);
+
+ ieee802154_unregister_hw(lp->hw);
+ ieee802154_free_hw(lp->hw);
+
+ return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+ { .compatible = "nxp,mcr20a", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+ { .name = "mcr20a", },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+ .id_table = mcr20a_device_id,
+ .driver = {
+ .of_match_table = of_match_ptr(mcr20a_of_match),
+ .name = "mcr20a",
+ },
+ .probe = mcr20a_probe,
+ .remove = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644
index 000000000000..6da4fd00b3c5
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.h
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1 0x00
+#define DAR_IRQ_STS2 0x01
+#define DAR_IRQ_STS3 0x02
+#define DAR_PHY_CTRL1 0x03
+#define DAR_PHY_CTRL2 0x04
+#define DAR_PHY_CTRL3 0x05
+#define DAR_RX_FRM_LEN 0x06
+#define DAR_PHY_CTRL4 0x07
+#define DAR_SRC_CTRL 0x08
+#define DAR_SRC_ADDRS_SUM_LSB 0x09
+#define DAR_SRC_ADDRS_SUM_MSB 0x0A
+#define DAR_CCA1_ED_FNL 0x0B
+#define DAR_EVENT_TMR_LSB 0x0C
+#define DAR_EVENT_TMR_MSB 0x0D
+#define DAR_EVENT_TMR_USB 0x0E
+#define DAR_TIMESTAMP_LSB 0x0F
+#define DAR_TIMESTAMP_MSB 0x10
+#define DAR_TIMESTAMP_USB 0x11
+#define DAR_T3CMP_LSB 0x12
+#define DAR_T3CMP_MSB 0x13
+#define DAR_T3CMP_USB 0x14
+#define DAR_T2PRIMECMP_LSB 0x15
+#define DAR_T2PRIMECMP_MSB 0x16
+#define DAR_T1CMP_LSB 0x17
+#define DAR_T1CMP_MSB 0x18
+#define DAR_T1CMP_USB 0x19
+#define DAR_T2CMP_LSB 0x1A
+#define DAR_T2CMP_MSB 0x1B
+#define DAR_T2CMP_USB 0x1C
+#define DAR_T4CMP_LSB 0x1D
+#define DAR_T4CMP_MSB 0x1E
+#define DAR_T4CMP_USB 0x1F
+#define DAR_PLL_INT0 0x20
+#define DAR_PLL_FRAC0_LSB 0x21
+#define DAR_PLL_FRAC0_MSB 0x22
+#define DAR_PA_PWR 0x23
+#define DAR_SEQ_STATE 0x24
+#define DAR_LQI_VALUE 0x25
+#define DAR_RSSI_CCA_CONT 0x26
+/*------------------ 0x27 */
+#define DAR_ASM_CTRL1 0x28
+#define DAR_ASM_CTRL2 0x29
+#define DAR_ASM_DATA_0 0x2A
+#define DAR_ASM_DATA_1 0x2B
+#define DAR_ASM_DATA_2 0x2C
+#define DAR_ASM_DATA_3 0x2D
+#define DAR_ASM_DATA_4 0x2E
+#define DAR_ASM_DATA_5 0x2F
+#define DAR_ASM_DATA_6 0x30
+#define DAR_ASM_DATA_7 0x31
+#define DAR_ASM_DATA_8 0x32
+#define DAR_ASM_DATA_9 0x33
+#define DAR_ASM_DATA_A 0x34
+#define DAR_ASM_DATA_B 0x35
+#define DAR_ASM_DATA_C 0x36
+#define DAR_ASM_DATA_D 0x37
+#define DAR_ASM_DATA_E 0x38
+#define DAR_ASM_DATA_F 0x39
+/*----------------------- 0x3A */
+#define DAR_OVERWRITE_VER 0x3B
+#define DAR_CLK_OUT_CTRL 0x3C
+#define DAR_PWR_MODES 0x3D
+#define IAR_INDEX 0x3E
+#define IAR_DATA 0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID 0x00
+#define IAR_XTAL_TRIM 0x01
+#define IAR_PMC_LP_TRIM 0x02
+#define IAR_MACPANID0_LSB 0x03
+#define IAR_MACPANID0_MSB 0x04
+#define IAR_MACSHORTADDRS0_LSB 0x05
+#define IAR_MACSHORTADDRS0_MSB 0x06
+#define IAR_MACLONGADDRS0_0 0x07
+#define IAR_MACLONGADDRS0_8 0x08
+#define IAR_MACLONGADDRS0_16 0x09
+#define IAR_MACLONGADDRS0_24 0x0A
+#define IAR_MACLONGADDRS0_32 0x0B
+#define IAR_MACLONGADDRS0_40 0x0C
+#define IAR_MACLONGADDRS0_48 0x0D
+#define IAR_MACLONGADDRS0_56 0x0E
+#define IAR_RX_FRAME_FILTER 0x0F
+#define IAR_PLL_INT1 0x10
+#define IAR_PLL_FRAC1_LSB 0x11
+#define IAR_PLL_FRAC1_MSB 0x12
+#define IAR_MACPANID1_LSB 0x13
+#define IAR_MACPANID1_MSB 0x14
+#define IAR_MACSHORTADDRS1_LSB 0x15
+#define IAR_MACSHORTADDRS1_MSB 0x16
+#define IAR_MACLONGADDRS1_0 0x17
+#define IAR_MACLONGADDRS1_8 0x18
+#define IAR_MACLONGADDRS1_16 0x19
+#define IAR_MACLONGADDRS1_24 0x1A
+#define IAR_MACLONGADDRS1_32 0x1B
+#define IAR_MACLONGADDRS1_40 0x1C
+#define IAR_MACLONGADDRS1_48 0x1D
+#define IAR_MACLONGADDRS1_56 0x1E
+#define IAR_DUAL_PAN_CTRL 0x1F
+#define IAR_DUAL_PAN_DWELL 0x20
+#define IAR_DUAL_PAN_STS 0x21
+#define IAR_CCA1_THRESH 0x22
+#define IAR_CCA1_ED_OFFSET_COMP 0x23
+#define IAR_LQI_OFFSET_COMP 0x24
+#define IAR_CCA_CTRL 0x25
+#define IAR_CCA2_CORR_PEAKS 0x26
+#define IAR_CCA2_CORR_THRESH 0x27
+#define IAR_TMR_PRESCALE 0x28
+/*-------------------- 0x29 */
+#define IAR_GPIO_DATA 0x2A
+#define IAR_GPIO_DIR 0x2B
+#define IAR_GPIO_PUL_EN 0x2C
+#define IAR_GPIO_PUL_SEL 0x2D
+#define IAR_GPIO_DS 0x2E
+/*------------------ 0x2F */
+#define IAR_ANT_PAD_CTRL 0x30
+#define IAR_MISC_PAD_CTRL 0x31
+#define IAR_BSM_CTRL 0x32
+/*------------------- 0x33 */
+#define IAR_RNG 0x34
+#define IAR_RX_BYTE_COUNT 0x35
+#define IAR_RX_WTR_MARK 0x36
+#define IAR_SOFT_RESET 0x37
+#define IAR_TXDELAY 0x38
+#define IAR_ACKDELAY 0x39
+#define IAR_SEQ_MGR_CTRL 0x3A
+#define IAR_SEQ_MGR_STS 0x3B
+#define IAR_SEQ_T_STS 0x3C
+#define IAR_ABORT_STS 0x3D
+#define IAR_CCCA_BUSY_CNT 0x3E
+#define IAR_SRC_ADDR_CHECKSUM1 0x3F
+#define IAR_SRC_ADDR_CHECKSUM2 0x40
+#define IAR_SRC_TBL_VALID1 0x41
+#define IAR_SRC_TBL_VALID2 0x42
+#define IAR_FILTERFAIL_CODE1 0x43
+#define IAR_FILTERFAIL_CODE2 0x44
+#define IAR_SLOT_PRELOAD 0x45
+/*-------------------- 0x46 */
+#define IAR_CORR_VT 0x47
+#define IAR_SYNC_CTRL 0x48
+#define IAR_PN_LSB_0 0x49
+#define IAR_PN_LSB_1 0x4A
+#define IAR_PN_MSB_0 0x4B
+#define IAR_PN_MSB_1 0x4C
+#define IAR_CORR_NVAL 0x4D
+#define IAR_TX_MODE_CTRL 0x4E
+#define IAR_SNF_THR 0x4F
+#define IAR_FAD_THR 0x50
+#define IAR_ANT_AGC_CTRL 0x51
+#define IAR_AGC_THR1 0x52
+#define IAR_AGC_THR2 0x53
+#define IAR_AGC_HYS 0x54
+#define IAR_AFC 0x55
+/*------------------- 0x56 */
+/*------------------- 0x57 */
+#define IAR_PHY_STS 0x58
+#define IAR_RX_MAX_CORR 0x59
+#define IAR_RX_MAX_PREAMBLE 0x5A
+#define IAR_RSSI 0x5B
+/*------------------- 0x5C */
+/*------------------- 0x5D */
+#define IAR_PLL_DIG_CTRL 0x5E
+#define IAR_VCO_CAL 0x5F
+#define IAR_VCO_BEST_DIFF 0x60
+#define IAR_VCO_BIAS 0x61
+#define IAR_KMOD_CTRL 0x62
+#define IAR_KMOD_CAL 0x63
+#define IAR_PA_CAL 0x64
+#define IAR_PA_PWRCAL 0x65
+#define IAR_ATT_RSSI1 0x66
+#define IAR_ATT_RSSI2 0x67
+#define IAR_RSSI_OFFSET 0x68
+#define IAR_RSSI_SLOPE 0x69
+#define IAR_RSSI_CAL1 0x6A
+#define IAR_RSSI_CAL2 0x6B
+/*------------------- 0x6C */
+/*------------------- 0x6D */
+#define IAR_XTAL_CTRL 0x6E
+#define IAR_XTAL_COMP_MIN 0x6F
+#define IAR_XTAL_COMP_MAX 0x70
+#define IAR_XTAL_GM 0x71
+/*------------------- 0x72 */
+/*------------------- 0x73 */
+#define IAR_LNA_TUNE 0x74
+#define IAR_LNA_AGCGAIN 0x75
+/*------------------- 0x76 */
+/*------------------- 0x77 */
+#define IAR_CHF_PMA_GAIN 0x78
+#define IAR_CHF_IBUF 0x79
+#define IAR_CHF_QBUF 0x7A
+#define IAR_CHF_IRIN 0x7B
+#define IAR_CHF_QRIN 0x7C
+#define IAR_CHF_IL 0x7D
+#define IAR_CHF_QL 0x7E
+#define IAR_CHF_CC1 0x7F
+#define IAR_CHF_CCL 0x80
+#define IAR_CHF_CC2 0x81
+#define IAR_CHF_IROUT 0x82
+#define IAR_CHF_QROUT 0x83
+/*------------------- 0x84 */
+/*------------------- 0x85 */
+#define IAR_RSSI_CTRL 0x86
+/*------------------- 0x87 */
+/*------------------- 0x88 */
+#define IAR_PA_BIAS 0x89
+#define IAR_PA_TUNING 0x8A
+/*------------------- 0x8B */
+/*------------------- 0x8C */
+#define IAR_PMC_HP_TRIM 0x8D
+#define IAR_VREGA_TRIM 0x8E
+/*------------------- 0x8F */
+/*------------------- 0x90 */
+#define IAR_VCO_CTRL1 0x91
+#define IAR_VCO_CTRL2 0x92
+/*------------------- 0x93 */
+/*------------------- 0x94 */
+#define IAR_ANA_SPARE_OUT1 0x95
+#define IAR_ANA_SPARE_OUT2 0x96
+#define IAR_ANA_SPARE_IN 0x97
+#define IAR_MISCELLANEOUS 0x98
+/*------------------- 0x99 */
+#define IAR_SEQ_MGR_OVRD0 0x9A
+#define IAR_SEQ_MGR_OVRD1 0x9B
+#define IAR_SEQ_MGR_OVRD2 0x9C
+#define IAR_SEQ_MGR_OVRD3 0x9D
+#define IAR_SEQ_MGR_OVRD4 0x9E
+#define IAR_SEQ_MGR_OVRD5 0x9F
+#define IAR_SEQ_MGR_OVRD6 0xA0
+#define IAR_SEQ_MGR_OVRD7 0xA1
+/*------------------- 0xA2 */
+#define IAR_TESTMODE_CTRL 0xA3
+#define IAR_DTM_CTRL1 0xA4
+#define IAR_DTM_CTRL2 0xA5
+#define IAR_ATM_CTRL1 0xA6
+#define IAR_ATM_CTRL2 0xA7
+#define IAR_ATM_CTRL3 0xA8
+/*------------------- 0xA9 */
+#define IAR_LIM_FE_TEST_CTRL 0xAA
+#define IAR_CHF_TEST_CTRL 0xAB
+#define IAR_VCO_TEST_CTRL 0xAC
+#define IAR_PLL_TEST_CTRL 0xAD
+#define IAR_PA_TEST_CTRL 0xAE
+#define IAR_PMC_TEST_CTRL 0xAF
+#define IAR_SCAN_DTM_PROTECT_1 0xFE
+#define IAR_SCAN_DTM_PROTECT_0 0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ BIT(4)
+#define DAR_IRQSTS1_CCAIRQ BIT(3)
+#define DAR_IRQSTS1_RXIRQ BIT(2)
+#define DAR_IRQSTS1_TXIRQ BIT(1)
+#define DAR_IRQSTS1_SEQIRQ BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID BIT(7)
+#define DAR_IRQSTS2_CCA BIT(6)
+#define DAR_IRQSTS2_SRCADDR BIT(5)
+#define DAR_IRQSTS2_PI BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK BIT(7)
+#define DAR_IRQSTS3_TMR3MSK BIT(6)
+#define DAR_IRQSTS3_TMR2MSK BIT(5)
+#define DAR_IRQSTS3_TMR1MSK BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT 5
+#define DAR_PHY_CTRL1_RXACKRQD BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK 0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK BIT(3)
+#define DAR_PHY_CTRL2_RXMSK BIT(2)
+#define DAR_PHY_CTRL2_TXMSK BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK (0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0 BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE (3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT (3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK (0x18)
+#define DAR_PHY_CTRL4_TMRLOAD BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX (0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT (4)
+#define DAR_SRC_CTRL_ACK_FRM_PND BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR BIT(7)
+#define DAR_ASM_CTRL1_START BIT(6)
+#define DAR_ASM_CTRL1_SELFTST BIT(5)
+#define DAR_ASM_CTRL1_CTR BIT(4)
+#define DAR_ASM_CTRL1_CBC BIT(3)
+#define DAR_ASM_CTRL1_AES BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL (7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT (5)
+#define DAR_ASM_CTRL2_TSTPAS BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ BIT(6)
+#define DAR_CLK_OUT_CTRL_SR BIT(5)
+#define DAR_CLK_OUT_CTRL_DS BIT(4)
+#define DAR_CLK_OUT_CTRL_EN BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV (7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY BIT(5)
+#define DAR_PWR_MODES_XTALEN BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN BIT(3)
+#define DAR_PWR_MODES_AUTODOZE BIT(1)
+#define DAR_PWR_MODES_PMC_MODE BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER (0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT (6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK (0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT (4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1 BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1 BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0 BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN (0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1 BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL (0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT (4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN (3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR (1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT (0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK (1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT (1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN (1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST BIT(7)
+#define IAR_SOFT_RESET_REGS_RST BIT(4)
+#define IAR_SOFT_RESET_PLL_RST BIT(3)
+#define IAR_SOFT_RESET_TX_RST BIT(2)
+#define IAR_SOFT_RESET_RX_RST BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL (3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT (6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL (7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8 (3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR BIT(6)
+#define IAR_PHY_STS_PLL_LOCK BIT(5)
+#define IAR_PHY_STS_CRCVALID BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL BIT(2)
+#define IAR_PHY_STS_SFD_DET BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN BIT(6)
+#define IAR_DTM_CTRL1_PAGE5 BIT(5)
+#define IAR_DTM_CTRL1_PAGE4 BIT(4)
+#define IAR_DTM_CTRL1_PAGE3 BIT(3)
+#define IAR_DTM_CTRL1_PAGE2 BIT(2)
+#define IAR_DTM_CTRL1_PAGE1 BIT(1)
+#define IAR_DTM_CTRL1_PAGE0 BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2 BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1 BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0 BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK (7)
+
+#endif /* _MCR20A_H */
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 5166575a164d..a115f12bf130 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -74,6 +74,7 @@ struct ipvl_dev {
DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
netdev_features_t sfeatures;
u32 msg_enable;
+ spinlock_t addrs_lock;
};
struct ipvl_addr {
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 1b5dc200b573..17daebd19e65 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
const void *iaddr, bool is_v6)
{
- struct ipvl_addr *addr;
+ struct ipvl_addr *addr, *ret = NULL;
- list_for_each_entry(addr, &ipvlan->addrs, anode)
- if (addr_equal(is_v6, addr, iaddr))
- return addr;
- return NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+ if (addr_equal(is_v6, addr, iaddr)) {
+ ret = addr;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
}
bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
{
struct ipvl_dev *ipvlan;
+ bool ret = false;
- ASSERT_RTNL();
-
- list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
- if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
- return true;
+ rcu_read_lock();
+ list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+ if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+ ret = true;
+ break;
+ }
}
- return false;
+ rcu_read_unlock();
+ return ret;
}
static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
@@ -498,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
/* In this mode we dont care about multicast and broadcast traffic */
if (is_multicast_ether_addr(ethh->h_dest)) {
- pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
- ntohs(skb->protocol));
+ pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+ ntohs(skb->protocol));
kfree_skb(skb);
goto out;
}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 67c91ceda979..3efc1c92c6a7 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
else
dev->flags &= ~IFF_NOARP;
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_add(ipvlan, addr);
+ rcu_read_unlock();
return dev_uc_add(phy_dev, phy_dev->dev_addr);
}
@@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
dev_uc_del(phy_dev, phy_dev->dev_addr);
- list_for_each_entry(addr, &ipvlan->addrs, anode)
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
ipvlan_ht_addr_del(addr);
+ rcu_read_unlock();
return 0;
}
@@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
ipvlan->sfeatures = IPVLAN_FEATURES;
ipvlan_adjust_mtu(ipvlan, phy_dev);
INIT_LIST_HEAD(&ipvlan->addrs);
+ spin_lock_init(&ipvlan->addrs_lock);
/* TODO Probably put random address here to be presented to the
* world but keep using the physical-dev address for the outgoing
@@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_addr *addr, *next;
+ spin_lock_bh(&ipvlan->addrs_lock);
list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
kfree_rcu(addr, rcu);
}
+ spin_unlock_bh(&ipvlan->addrs_lock);
ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
list_del_rcu(&ipvlan->pnode);
@@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
if (dev->reg_state != NETREG_UNREGISTERING)
break;
- list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
- pnode)
+ list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
&lst_kill);
unregister_netdevice_many(&lst_kill);
@@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
return NOTIFY_DONE;
}
+/* the caller must held the addrs lock */
static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
@@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
addr->atype = IPVL_IPV6;
#endif
}
- list_add_tail(&addr->anode, &ipvlan->addrs);
+
+ list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
/* If the interface is not up, the address will be added to the hash
* list by ipvlan_open.
@@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
{
struct ipvl_addr *addr;
+ spin_lock_bh(&ipvlan->addrs_lock);
addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
- if (!addr)
+ if (!addr) {
+ spin_unlock_bh(&ipvlan->addrs_lock);
return;
+ }
ipvlan_ht_addr_del(addr);
- list_del(&addr->anode);
+ list_del_rcu(&addr->anode);
+ spin_unlock_bh(&ipvlan->addrs_lock);
kfree_rcu(addr, rcu);
-
- return;
}
static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
#if IS_ENABLED(CONFIG_IPV6)
static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
{
- if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv6=%pI6c addr for %s intf\n",
ip6_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip6_addr, true);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
}
static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
@@ -899,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
- /* FIXME IPv6 autoconf calls us from bh without RTNL */
- if (in_softirq())
- return NOTIFY_DONE;
-
if (!ipvlan_is_valid_dev(dev))
return NOTIFY_DONE;
@@ -922,14 +931,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
{
- if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+ int ret = -EINVAL;
+
+ spin_lock_bh(&ipvlan->addrs_lock);
+ if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
netif_err(ipvlan, ifup, ipvlan->dev,
"Failed to add IPv4=%pI4 on %s intf.\n",
ip4_addr, ipvlan->dev->name);
- return -EINVAL;
- }
-
- return ipvlan_add_addr(ipvlan, ip4_addr, false);
+ else
+ ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+ spin_unlock_bh(&ipvlan->addrs_lock);
+ return ret;
}
static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
@@ -1024,6 +1036,7 @@ static struct pernet_operations ipvlan_net_ops = {
.id = &ipvlan_netid,
.size = sizeof(struct ipvlan_netns),
.exit = ipvlan_ns_exit,
+ .async = true,
};
static int __init ipvlan_init_module(void)
diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c
index e8ae50e1255e..319edc9c8ec7 100644
--- a/drivers/net/phy/aquantia.c
+++ b/drivers/net/phy/aquantia.c
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
return 0;
}
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
- int reg;
-
- reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
- return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
static int aquantia_config_intr(struct phy_device *phydev)
{
int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQ1202",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQ2104",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR105",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR106",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR107",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
.name = "Aquantia AQR405",
.features = PHY_AQUANTIA_FEATURES,
.flags = PHY_HAS_INTERRUPT,
- .aneg_done = aquantia_aneg_done,
+ .aneg_done = genphy_c45_aneg_done,
.config_aneg = aquantia_config_aneg,
.config_intr = aquantia_config_intr,
.ack_interrupt = aquantia_ack_interrupt,
diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index 9442db221834..8022cd317f62 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
MII_ADDR_C45 | regnum);
}
-static int cortina_config_aneg(struct phy_device *phydev)
-{
- phydev->supported = SUPPORTED_10000baseT_Full;
- phydev->advertising = SUPPORTED_10000baseT_Full;
-
- return 0;
-}
-
static int cortina_read_status(struct phy_device *phydev)
{
int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ err:
return ret;
}
-static int cortina_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int cortina_probe(struct phy_device *phydev)
{
u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
.phy_id = PHY_ID_CS4340,
.phy_id_mask = 0xffffffff,
.name = "Cortina CS4340",
- .config_aneg = cortina_config_aneg,
+ .config_init = gen10g_config_init,
+ .config_aneg = gen10g_config_aneg,
.read_status = cortina_read_status,
- .soft_reset = cortina_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.probe = cortina_probe,
},
};
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 8a0bd98fdec7..4f1efa02a930 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
return 0;
}
-/*
- * Resetting the MV88X3310 causes it to become non-responsive. Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int mv3310_config_init(struct phy_device *phydev)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
SUPPORTED_10000baseT_Full |
SUPPORTED_Backplane,
.probe = mv3310_probe,
- .soft_reset = mv3310_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.config_init = mv3310_config_init,
.config_aneg = mv3310_config_aneg,
.aneg_done = mv3310_aneg_done,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index a4576859afae..0017eddc24db 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
/* The gen10g_* functions are the old Clause 45 stub */
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
{
u32 mmd_mask = phydev->c45_ids.devices_in_package;
int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_read_status);
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
{
/* Do nothing for now */
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
{
/* Temporarily just say we support everything */
phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_config_init);
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_suspend);
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
{
return 0;
}
+EXPORT_SYMBOL_GPL(gen10g_resume);
struct phy_driver genphy_10g_driver = {
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
.name = "Generic 10G PHY",
- .soft_reset = gen10g_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.config_init = gen10g_config_init,
.features = 0,
.config_aneg = gen10g_config_aneg,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6ac8b29b2dc3..7d1724072325 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -679,7 +679,6 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
- pl->netdev->phydev = phy;
pl->phydev = phy;
linkmode_copy(pl->supported, supported);
linkmode_copy(pl->link_config.advertising, config.advertising);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
if (phy) {
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
- pl->netdev->phydev = NULL;
pl->phydev = NULL;
mutex_unlock(&pl->state_mutex);
mutex_unlock(&phy->lock);
@@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream,
bool changed;
u8 port;
- sfp_parse_support(pl->sfp_bus, id, support);
- port = sfp_parse_port(pl->sfp_bus, id, support);
- iface = sfp_parse_interface(pl->sfp_bus, id);
-
ASSERT_RTNL();
- switch (iface) {
- case PHY_INTERFACE_MODE_SGMII:
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- case PHY_INTERFACE_MODE_10GKR:
- break;
- default:
- return -EINVAL;
- }
+ sfp_parse_support(pl->sfp_bus, id, support);
+ port = sfp_parse_port(pl->sfp_bus, id, support);
memset(&config, 0, sizeof(config));
linkmode_copy(config.advertising, support);
- config.interface = iface;
+ config.interface = PHY_INTERFACE_MODE_NA;
config.speed = SPEED_UNKNOWN;
config.duplex = DUPLEX_UNKNOWN;
config.pause = MLO_PAUSE_AN;
@@ -1611,6 +1598,22 @@ static int phylink_sfp_module_insert(void *upstream,
/* Ignore errors if we're expecting a PHY to attach later */
ret = phylink_validate(pl, support, &config);
if (ret) {
+ netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+ return ret;
+ }
+
+ iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+ if (iface == PHY_INTERFACE_MODE_NA) {
+ netdev_err(pl->netdev,
+ "selection of interface failed, advertisment %*pb\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+ return -EINVAL;
+ }
+
+ config.interface = iface;
+ ret = phylink_validate(pl, support, &config);
+ if (ret) {
netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
phylink_an_mode_str(MLO_AN_INBAND),
phy_modes(config.interface),
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8961209ee949..3d4ff5d0d2a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -106,68 +106,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
EXPORT_SYMBOL_GPL(sfp_parse_port);
/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- phy_interface_t iface;
-
- /* Setting the serdes link mode is guesswork: there's no field in
- * the EEPROM which indicates what mode should be used.
- *
- * If the module wants 64b66b, then it must be >= 10G.
- *
- * If it's a gigabit-only fiber module, it probably does not have
- * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
- * to SGMII mode (which is required to support non-gigabit speeds).
- */
- switch (id->base.encoding) {
- case SFP_ENCODING_8472_64B66B:
- iface = PHY_INTERFACE_MODE_10GKR;
- break;
-
- case SFP_ENCODING_8B10B:
- if (!id->base.e1000_base_t &&
- !id->base.e100_base_lx &&
- !id->base.e100_base_fx)
- iface = PHY_INTERFACE_MODE_1000BASEX;
- else
- iface = PHY_INTERFACE_MODE_SGMII;
- break;
-
- default:
- if (id->base.e1000_base_cx) {
- iface = PHY_INTERFACE_MODE_1000BASEX;
- break;
- }
-
- iface = PHY_INTERFACE_MODE_NA;
- dev_err(bus->sfp_dev,
- "SFP module encoding does not support 8b10b nor 64b66b\n");
- break;
- }
-
- return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
-/**
* sfp_parse_support() - Parse the eeprom id for supported link modes
* @bus: a pointer to the &struct sfp_bus structure for the sfp module
* @id: a pointer to the module's &struct sfp_eeprom_id
@@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support)
{
unsigned int br_min, br_nom, br_max;
-
- phylink_set(support, Autoneg);
- phylink_set(support, Pause);
- phylink_set(support, Asym_Pause);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
/* Decode the bitrate information to MBd */
br_min = br_nom = br_max = 0;
@@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
/* Set ethtool support from the compliance fields. */
if (id->base.e10g_base_sr)
- phylink_set(support, 10000baseSR_Full);
+ phylink_set(modes, 10000baseSR_Full);
if (id->base.e10g_base_lr)
- phylink_set(support, 10000baseLR_Full);
+ phylink_set(modes, 10000baseLR_Full);
if (id->base.e10g_base_lrm)
- phylink_set(support, 10000baseLRM_Full);
+ phylink_set(modes, 10000baseLRM_Full);
if (id->base.e10g_base_er)
- phylink_set(support, 10000baseER_Full);
+ phylink_set(modes, 10000baseER_Full);
if (id->base.e1000_base_sx ||
id->base.e1000_base_lx ||
id->base.e1000_base_cx)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
if (id->base.e1000_base_t) {
- phylink_set(support, 1000baseT_Half);
- phylink_set(support, 1000baseT_Full);
+ phylink_set(modes, 1000baseT_Half);
+ phylink_set(modes, 1000baseT_Full);
}
/* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
/* This may look odd, but some manufacturers use 12000MBd */
if (br_min <= 12000 && br_max >= 10300)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
if (br_min <= 3200 && br_max >= 3100)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (br_min <= 1300 && br_max >= 1200)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
if (id->base.sfp_ct_passive) {
if (id->base.passive.sff8431_app_e)
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
if (id->base.sfp_ct_active) {
if (id->base.active.sff8431_app_e ||
id->base.active.sff8431_lim) {
- phylink_set(support, 10000baseCR_Full);
+ phylink_set(modes, 10000baseCR_Full);
}
}
@@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
case 0x00: /* Unspecified */
break;
case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
- phylink_set(support, 100000baseSR4_Full);
- phylink_set(support, 25000baseSR_Full);
+ phylink_set(modes, 100000baseSR4_Full);
+ phylink_set(modes, 25000baseSR_Full);
break;
case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
- phylink_set(support, 100000baseLR4_ER4_Full);
+ phylink_set(modes, 100000baseLR4_ER4_Full);
break;
case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
case 0x0c: /* 25Gbase-CR CA-S */
case 0x0d: /* 25Gbase-CR CA-N */
- phylink_set(support, 100000baseCR4_Full);
- phylink_set(support, 25000baseCR_Full);
+ phylink_set(modes, 100000baseCR4_Full);
+ phylink_set(modes, 25000baseCR_Full);
break;
default:
dev_warn(bus->sfp_dev,
@@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
id->base.fc_speed_200 ||
id->base.fc_speed_400) {
if (id->base.br_nominal >= 31)
- phylink_set(support, 2500baseX_Full);
+ phylink_set(modes, 2500baseX_Full);
if (id->base.br_nominal >= 12)
- phylink_set(support, 1000baseX_Full);
+ phylink_set(modes, 1000baseX_Full);
}
+
+ /* If we haven't discovered any modes that this module supports, try
+ * the encoding and bitrate to determine supported modes. Some BiDi
+ * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+ * the differing wavelengths, so do not set any transceiver bits.
+ */
+ if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+ /* If the encoding and bit rate allows 1000baseX */
+ if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+ br_min <= 1300 && br_max >= 1200)
+ phylink_set(modes, 1000baseX_Full);
+ }
+
+ bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ phylink_set(support, Autoneg);
+ phylink_set(support, Pause);
+ phylink_set(support, Asym_Pause);
}
EXPORT_SYMBOL_GPL(sfp_parse_support);
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ if (phylink_test(link_modes, 10000baseCR_Full) ||
+ phylink_test(link_modes, 10000baseSR_Full) ||
+ phylink_test(link_modes, 10000baseLR_Full) ||
+ phylink_test(link_modes, 10000baseLRM_Full) ||
+ phylink_test(link_modes, 10000baseER_Full))
+ return PHY_INTERFACE_MODE_10GKR;
+
+ if (phylink_test(link_modes, 2500baseX_Full))
+ return PHY_INTERFACE_MODE_2500BASEX;
+
+ if (id->base.e1000_base_t ||
+ id->base.e100_base_lx ||
+ id->base.e100_base_fx)
+ return PHY_INTERFACE_MODE_SGMII;
+
+ if (phylink_test(link_modes, 1000baseX_Full))
+ return PHY_INTERFACE_MODE_1000BASEX;
+
+ dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+ return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
static LIST_HEAD(sfp_buses);
static DEFINE_MUTEX(sfp_mutex);
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 6c7d9289078d..83bf4959b043 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -42,6 +42,7 @@ enum {
SFP_MOD_EMPTY = 0,
SFP_MOD_PROBE,
+ SFP_MOD_HPOWER,
SFP_MOD_PRESENT,
SFP_MOD_ERROR,
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
* access the I2C EEPROM. However, Avago modules require 300ms.
*/
#define T_PROBE_INIT msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL msecs_to_jiffies(300)
#define T_PROBE_RETRY msecs_to_jiffies(100)
/* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
struct sfp_bus *sfp_bus;
struct phy_device *mod_phy;
const struct sff_data *type;
+ u32 max_power_mW;
unsigned int (*get_state)(struct sfp *);
void (*set_state)(struct sfp *, unsigned int);
int (*read)(struct sfp *, bool, u8, void *, size_t);
+ int (*write)(struct sfp *, bool, u8, void *, size_t);
struct gpio_desc *gpio[GPIO_MAX];
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
}
}
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
- void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
struct i2c_msg msgs[2];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
int ret;
msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
msgs[1].len = len;
msgs[1].buf = buf;
- ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
if (ret < 0)
return ret;
return ret == ARRAY_SIZE(msgs) ? len : 0;
}
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
- size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+ size_t len)
{
- return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+ struct i2c_msg msgs[1];
+ u8 bus_addr = a2 ? 0x51 : 0x50;
+ int ret;
+
+ msgs[0].addr = bus_addr;
+ msgs[0].flags = 0;
+ msgs[0].len = 1 + len;
+ msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+ if (!msgs[0].buf)
+ return -ENOMEM;
+
+ msgs[0].buf[0] = dev_addr;
+ memcpy(&msgs[0].buf[1], buf, len);
+
+ ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+ kfree(msgs[0].buf);
+
+ if (ret < 0)
+ return ret;
+
+ return ret == ARRAY_SIZE(msgs) ? len : 0;
}
static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
sfp->i2c = i2c;
sfp->read = sfp_i2c_read;
+ sfp->write = sfp_i2c_write;
i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
return sfp->read(sfp, a2, addr, buf, len);
}
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+ return sfp->write(sfp, a2, addr, buf, len);
+}
+
static unsigned int sfp_check(void *buf, size_t len)
{
u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
sfp_sm_probe_phy(sfp);
}
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+ u32 power;
+ u8 val;
+ int err;
+
+ power = 1000;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+ power = 1500;
+ if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+ power = 2000;
+
+ if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+ (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+ SFP_DIAGMON_DDM) {
+ /* The module appears not to implement bus address 0xa2,
+ * or requires an address change sequence, so assume that
+ * the module powers up in the indicated power mode.
+ */
+ if (power > sfp->max_power_mW) {
+ dev_err(sfp->dev,
+ "Host does not support %u.%uW modules\n",
+ power / 1000, (power / 100) % 10);
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ if (power > sfp->max_power_mW) {
+ dev_warn(sfp->dev,
+ "Host does not support %u.%uW modules, module left in power mode 1\n",
+ power / 1000, (power / 100) % 10);
+ return 0;
+ }
+
+ if (power <= 1000)
+ return 0;
+
+ err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ val |= BIT(0);
+
+ err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+ if (err != sizeof(val)) {
+ dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+ err = -EAGAIN;
+ goto err;
+ }
+
+ dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+ power / 1000, (power / 100) % 10);
+ return T_HPOWER_LEVEL;
+
+err:
+ return err;
+}
+
static int sfp_sm_mod_probe(struct sfp *sfp)
{
/* SFP module inserted - read I2C data */
struct sfp_eeprom_id id;
u8 check;
- int err;
+ int ret;
- err = sfp_read(sfp, false, 0, &id, sizeof(id));
- if (err < 0) {
- dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+ ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+ if (ret < 0) {
+ dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
return -EAGAIN;
}
- if (err != sizeof(id)) {
- dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+ if (ret != sizeof(id)) {
+ dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
return -EAGAIN;
}
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
dev_warn(sfp->dev,
"module address swap to access page 0xA2 is not supported.\n");
- return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+ if (ret < 0)
+ return ret;
+
+ return sfp_sm_mod_hpower(sfp);
}
static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
if (event == SFP_E_REMOVE) {
sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
} else if (event == SFP_E_TIMEOUT) {
- int err = sfp_sm_mod_probe(sfp);
+ int val = sfp_sm_mod_probe(sfp);
- if (err == 0)
+ if (val == 0)
sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
- else if (err == -EAGAIN)
- sfp_sm_set_timer(sfp, T_PROBE_RETRY);
- else
+ else if (val > 0)
+ sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+ else if (val != -EAGAIN)
sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+ else
+ sfp_sm_set_timer(sfp, T_PROBE_RETRY);
}
break;
+ case SFP_MOD_HPOWER:
+ if (event == SFP_E_TIMEOUT) {
+ sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+ break;
+ }
+ /* fallthrough */
case SFP_MOD_PRESENT:
case SFP_MOD_ERROR:
if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
if (!(sfp->gpio[GPIO_MODDEF0]))
sfp->get_state = sff_gpio_get_state;
+ device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+ &sfp->max_power_mW);
+ if (!sfp->max_power_mW)
+ sfp->max_power_mW = 1000;
+
+ dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+ sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
if (!sfp->sfp_bus)
return -ENOMEM;
diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index fb2cef764e9a..22f3bdd8206c 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
MDIO_PHYXS_LNSTAT_SYNC3 | \
MDIO_PHYXS_LNSTAT_ALIGN)
-static int teranetics_config_init(struct phy_device *phydev)
-{
- phydev->supported = SUPPORTED_10000baseT_Full;
- phydev->advertising = SUPPORTED_10000baseT_Full;
-
- return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
- return 0;
-}
-
static int teranetics_aneg_done(struct phy_device *phydev)
{
- int reg;
-
/* auto negotiation state can only be checked when using copper
* port, if using fiber port, just lie it's done.
*/
- if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
- reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
- return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
- }
+ if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+ return genphy_c45_aneg_done(phydev);
return 1;
}
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
- return 0;
-}
-
static int teranetics_read_status(struct phy_device *phydev)
{
int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
.phy_id = PHY_ID_TN2020,
.phy_id_mask = 0xffffffff,
.name = "Teranetics TN2020",
- .soft_reset = teranetics_soft_reset,
+ .soft_reset = gen10g_no_soft_reset,
.aneg_done = teranetics_aneg_done,
- .config_init = teranetics_config_init,
- .config_aneg = teranetics_config_aneg,
+ .config_init = gen10g_config_init,
+ .config_aneg = gen10g_config_aneg,
.read_status = teranetics_read_status,
.match_phy_device = teranetics_match_phy_device,
},
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 255a5def56e9..a393c1dff7dc 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -971,6 +971,7 @@ static struct pernet_operations ppp_net_ops = {
.exit = ppp_exit_net,
.id = &ppp_net_id,
.size = sizeof(struct ppp_net),
+ .async = true,
};
static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index bd89d1c559ce..c10e6181a2f0 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1161,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = {
.exit = pppoe_exit_net,
.id = &pppoe_net_id,
.size = sizeof(struct pppoe_net),
+ .async = true,
};
static int __init pppoe_init(void)
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a468439969df..5dd781e65958 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
}
#endif
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+ struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
int err;
lag_upper_info.tx_type = team->mode->lag_tx_type;
err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
- &lag_upper_info, NULL);
+ &lag_upper_info, extack);
if (err)
return err;
port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
static int team_dev_type_check_change(struct net_device *dev,
struct net_device *port_dev);
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
struct net_device *dev = team->dev;
struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
int err;
if (port_dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
portname);
return -EINVAL;
}
if (team_port_exists(port_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
netdev_err(dev, "Device %s is already a port "
"of a team device\n", portname);
return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
portname);
return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
return err;
if (port_dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
portname);
return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
goto err_handler_register;
}
- err = team_upper_dev_link(team, port);
+ err = team_upper_dev_link(team, port, extack);
if (err) {
netdev_err(dev, "Device %s failed to set upper link\n",
portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
int err;
mutex_lock(&team->lock);
- err = team_port_add(team, port_dev);
+ err = team_port_add(team, port_dev, extack);
mutex_unlock(&team->lock);
if (!err)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9ce0182223a0..e459e601c57f 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1434,6 +1434,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
.init = vrf_netns_init,
.id = &vrf_net_id,
.size = sizeof(bool),
+ .async = true,
};
static int __init vrf_init_module(void)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fab7a4db249e..aa5f034d6ad1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = {
.exit_batch = vxlan_exit_batch_net,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
+ .async = true,
};
static int __init vxlan_init_module(void)
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b1cf7c6f407a..ef5887037b22 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
BUG();
}
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
{
struct xenvif_pkt_state pkt;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9c36d614bf89..2dee4e03ff1c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = {
.exit = lockd_exit_net,
.id = &lockd_net_id,
.size = sizeof(struct lockd_net),
+ .async = true,
};
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d893543cf3b..6c3083c992e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = {
.exit = nfs_net_exit,
.id = &nfs_net_id,
.size = sizeof(struct nfs_net),
+ .async = true,
};
/*
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 5be08f02a76b..8c743a405df6 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = {
.exit = grace_exit_net,
.id = &grace_net_id,
.size = sizeof(struct list_head),
+ .async = true,
};
static int __init
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a7f16e0f8d68..8a4566691c8f 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled && sk) { \
+ if (cgroup_bpf_enabled) { \
__ret = __cgroup_bpf_run_filter_sk(sk, \
BPF_CGROUP_INET_SOCK_CREATE); \
} \
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 276932d75975..fdb691b520c0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,7 +20,6 @@
#include <linux/set_memory.h>
#include <linux/kallsyms.h>
-#include <net/xdp.h>
#include <net/sch_generic.h>
#include <uapi/linux/filter.h>
@@ -30,6 +29,7 @@ struct sk_buff;
struct sock;
struct seccomp_data;
struct bpf_prog_aux;
+struct xdp_rxq_info;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bfea26af6de5..4814cad7456e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
}
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+ (MLX5_CAP_GEN(mdev, vport_group_manager) && \
+ (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+ mlx5_core_is_pf(mdev))
+
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644
index 000000000000..d3c9db492b30
--- /dev/null
+++ b/include/linux/mlx5/eswitch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+ SRIOV_NONE,
+ SRIOV_LEGACY,
+ SRIOV_OFFLOADS
+};
+
+enum {
+ REP_ETH,
+ REP_IB,
+ NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+ int (*load)(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep);
+ void (*unload)(struct mlx5_eswitch_rep *rep);
+ void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+ void *priv;
+ bool valid;
+};
+
+struct mlx5_eswitch_rep {
+ struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+ u16 vport;
+ u8 hw_id[ETH_ALEN];
+ u16 vlan;
+ u32 vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ struct mlx5_eswitch_rep_if *rep_if,
+ u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+ int vport, u32 sqn);
+#endif
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 5396521a776a..7ed82e4f11b3 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -4,11 +4,10 @@
#include <linux/in.h>
#include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/fib_notifier.h>
#include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
#ifdef CONFIG_IP_MROUTE
static inline int ip_mroute_opt(int opt)
@@ -56,18 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
}
#endif
-struct vif_device {
- struct net_device *dev; /* Device we are using */
- struct netdev_phys_item_id dev_parent_id; /* Device parent ID */
- unsigned long bytes_in,bytes_out;
- unsigned long pkt_in,pkt_out; /* Statistics */
- unsigned long rate_limit; /* Traffic shaping (NI) */
- unsigned char threshold; /* TTL threshold */
- unsigned short flags; /* Control flags */
- __be32 local,remote; /* Addresses(remote for tunnels)*/
- int link; /* Physical interface index */
-};
-
struct vif_entry_notifier_info {
struct fib_notifier_info info;
struct net_device *dev;
@@ -78,34 +65,6 @@ struct vif_entry_notifier_info {
#define VIFF_STATIC 0x8000
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock __rcu *mroute_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc_unres_queue;
- struct vif_device vif_table[MAXVIFS];
- struct rhltable mfc_hash;
- struct list_head mfc_cache_list;
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
- int mroute_reg_vif_num;
-};
-
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
- MFC_STATIC = BIT(0),
- MFC_OFFLOAD = BIT(1),
-};
-
struct mfc_cache_cmp_arg {
__be32 mfc_mcastgrp;
__be32 mfc_origin;
@@ -113,28 +72,13 @@ struct mfc_cache_cmp_arg {
/**
* struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
* @mfc_mcastgrp: destination multicast group address
* @mfc_origin: source address
* @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
*/
struct mfc_cache {
- struct rhlist_head mnode;
+ struct mr_mfc _c;
union {
struct {
__be32 mfc_mcastgrp;
@@ -142,28 +86,6 @@ struct mfc_cache {
};
struct mfc_cache_cmp_arg cmparg;
};
- vifi_t mfc_parent;
- int mfc_flags;
-
- union {
- struct {
- unsigned long expires;
- struct sk_buff_head unresolved;
- } unres;
- struct {
- unsigned long last_assert;
- int minvif;
- int maxvif;
- unsigned long bytes;
- unsigned long pkt;
- unsigned long wrong_if;
- unsigned long lastuse;
- unsigned char ttls[MAXVIFS];
- refcount_t refcount;
- } res;
- } mfc_un;
- struct list_head list;
- struct rcu_head rcu;
};
struct mfc_entry_notifier_info {
@@ -187,12 +109,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
static inline void ipmr_cache_put(struct mfc_cache *c)
{
- if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+ if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
ipmr_cache_free(c);
}
static inline void ipmr_cache_hold(struct mfc_cache *c)
{
- refcount_inc(&c->mfc_un.res.refcount);
+ refcount_inc(&c->_c.mfc_un.res.refcount);
}
#endif
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 3014c52bfd86..1ac38e6819f5 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -7,6 +7,7 @@
#include <linux/skbuff.h> /* for struct sk_buff_head */
#include <net/net_namespace.h>
#include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
#ifdef CONFIG_IPV6_MROUTE
static inline int ip6_mroute_opt(int opt)
@@ -62,57 +63,24 @@ static inline void ip6_mr_cleanup(void)
}
#endif
-struct mif_device {
- struct net_device *dev; /* Device we are using */
- unsigned long bytes_in,bytes_out;
- unsigned long pkt_in,pkt_out; /* Statistics */
- unsigned long rate_limit; /* Traffic shaping (NI) */
- unsigned char threshold; /* TTL threshold */
- unsigned short flags; /* Control flags */
- int link; /* Physical interface index */
-};
-
#define VIFF_STATIC 0x8000
-struct mfc6_cache {
- struct list_head list;
- struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */
- struct in6_addr mf6c_origin; /* Source of packet */
- mifi_t mf6c_parent; /* Source interface */
- int mfc_flags; /* Flags on line */
+struct mfc6_cache_cmp_arg {
+ struct in6_addr mf6c_mcastgrp;
+ struct in6_addr mf6c_origin;
+};
+struct mfc6_cache {
+ struct mr_mfc _c;
union {
struct {
- unsigned long expires;
- struct sk_buff_head unresolved; /* Unresolved buffers */
- } unres;
- struct {
- unsigned long last_assert;
- int minvif;
- int maxvif;
- unsigned long bytes;
- unsigned long pkt;
- unsigned long wrong_if;
- unsigned long lastuse;
- unsigned char ttls[MAXMIFS]; /* TTL thresholds */
- } res;
- } mfc_un;
+ struct in6_addr mf6c_mcastgrp;
+ struct in6_addr mf6c_origin;
+ };
+ struct mfc6_cache_cmp_arg cmparg;
+ };
};
-#define MFC_STATIC 1
-#define MFC_NOTIFY 2
-
-#define MFC6_LINES 64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
- (__force u32)(a)->s6_addr32[1] ^ \
- (__force u32)(a)->s6_addr32[2] ^ \
- (__force u32)(a)->s6_addr32[3] ^ \
- (__force u32)(g)->s6_addr32[0] ^ \
- (__force u32)(g)->s6_addr32[1] ^ \
- (__force u32)(g)->s6_addr32[2] ^ \
- (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
#define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */
struct rtmsg;
@@ -120,12 +88,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
struct rtmsg *rtm, u32 portid);
#ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
extern int ip6mr_sk_done(struct sock *sk);
#else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- return NULL;
+ return false;
}
static inline int ip6mr_sk_done(struct sock *sk)
{
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644
index 000000000000..c2560cb50f1d
--- /dev/null
+++ b/include/linux/mroute_base.h
@@ -0,0 +1,346 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/spinlock.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+ struct net_device *dev;
+ unsigned long bytes_in, bytes_out;
+ unsigned long pkt_in, pkt_out;
+ unsigned long rate_limit;
+ unsigned char threshold;
+ unsigned short flags;
+ int link;
+
+ /* Currently only used by ipmr */
+ struct netdev_phys_item_id dev_parent_id;
+ __be32 local, remote;
+};
+
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS 32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+ MFC_STATIC = BIT(0),
+ MFC_OFFLOAD = BIT(1),
+};
+
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+ struct rhlist_head mnode;
+ unsigned short mfc_parent;
+ int mfc_flags;
+
+ union {
+ struct {
+ unsigned long expires;
+ struct sk_buff_head unresolved;
+ } unres;
+ struct {
+ unsigned long last_assert;
+ int minvif;
+ int maxvif;
+ unsigned long bytes;
+ unsigned long pkt;
+ unsigned long wrong_if;
+ unsigned long lastuse;
+ unsigned char ttls[MAXVIFS];
+ refcount_t refcount;
+ } res;
+ } mfc_un;
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+ const struct rhashtable_params *rht_params;
+ void *cmparg_any;
+};
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @ops: protocol specific operations
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+ struct list_head list;
+ possible_net_t net;
+ struct mr_table_ops ops;
+ u32 id;
+ struct sock __rcu *mroute_sk;
+ struct timer_list ipmr_expire_timer;
+ struct list_head mfc_unres_queue;
+ struct vif_device vif_table[MAXVIFS];
+ struct rhltable mfc_hash;
+ struct list_head mfc_cache_list;
+ int maxvif;
+ atomic_t cache_resolve_queue_len;
+ bool mroute_do_assert;
+ bool mroute_do_pim;
+ int mroute_reg_vif_num;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+ void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock);
+#else
+static inline void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+}
+
+static inline void *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+ void *hasharg, int parent)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+ int vifi)
+{
+ return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+ int vifi, void *hasharg)
+{
+ return NULL;
+}
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ return -EINVAL;
+}
+#endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+ return mr_mfc_find_parent(mrt, hasharg, -1);
+}
+
+#ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+ struct seq_net_private p;
+ struct mr_table *mrt;
+ int ct;
+};
+
+struct mr_mfc_iter {
+ struct seq_net_private p;
+ struct mr_table *mrt;
+ struct list_head *cache;
+
+ /* Lock protecting the mr_table's unresolved queue */
+ spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+ seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+ struct mr_table *mrt, spinlock_t *lock)
+{
+ struct mr_mfc_iter *it = seq->private;
+
+ it->mrt = mrt;
+ it->cache = NULL;
+ it->lock = lock;
+
+ return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+ seq->private, *pos - 1)
+ : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ spin_unlock_bh(it->lock);
+ else if (it->cache == &mrt->mfc_cache_list)
+ rcu_read_unlock();
+}
+#else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+ loff_t pos)
+{
+ return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+ void *v, loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+ struct mr_table *mrt, spinlock_t *lock)
+{
+ return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
+#endif
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5a0c3e53e7c2..6e38c699b753 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -994,6 +994,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
int genphy_c45_an_disable_aneg(struct phy_device *phydev);
int genphy_c45_read_mdix(struct phy_device *phydev);
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
static inline int phy_read_status(struct phy_device *phydev)
{
if (!phydev->drv)
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index e724d5a3dd80..ebce9e24906a 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
#if IS_ENABLED(CONFIG_SFP)
int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id);
void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes);
int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
return PORT_OTHER;
}
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id)
-{
- return PHY_INTERFACE_MODE_NA;
-}
-
static inline void sfp_parse_support(struct sfp_bus *bus,
const struct sfp_eeprom_id *id,
unsigned long *support)
{
}
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id,
+ unsigned long *link_modes)
+{
+ return PHY_INTERFACE_MODE_NA;
+}
+
static inline int sfp_get_module_info(struct sfp_bus *bus,
struct ethtool_modinfo *modinfo)
{
diff --git a/include/net/Space.h b/include/net/Space.h
index 336da258885a..9cce0d80d37a 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit);
struct net_device *mvme147lance_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
struct net_device *cops_probe(int unit);
struct net_device *ltpc_probe(void);
diff --git a/include/net/ethoc.h b/include/net/ethoc.h
index bb7f467da7fc..29ba069a1d93 100644
--- a/include/net/ethoc.h
+++ b/include/net/ethoc.h
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
};
#endif /* !LINUX_NET_ETHOC_H */
-
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b3d216249240..1c9e17c11953 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -27,7 +27,7 @@ struct fib_rule {
u8 action;
u8 l3mdev;
u8 proto;
- /* 1 byte hole, try to use */
+ u8 ip_proto;
u32 target;
__be64 tun_id;
struct fib_rule __rcu *ctarget;
@@ -40,6 +40,8 @@ struct fib_rule {
char iifname[IFNAMSIZ];
char oifname[IFNAMSIZ];
struct fib_kuid_range uid_range;
+ struct fib_rule_port_range sport_range;
+ struct fib_rule_port_range dport_range;
struct rcu_head rcu;
};
@@ -110,7 +112,11 @@ struct fib_rule_notifier_info {
[FRA_GOTO] = { .type = NLA_U32 }, \
[FRA_L3MDEV] = { .type = NLA_U8 }, \
[FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \
- [FRA_PROTOCOL] = { .type = NLA_U8 }
+ [FRA_PROTOCOL] = { .type = NLA_U8 }, \
+ [FRA_IP_PROTO] = { .type = NLA_U8 }, \
+ [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
static inline void fib_rule_get(struct fib_rule *rule)
{
@@ -144,6 +150,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
return frh->table;
}
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+ return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+ __be16 port)
+{
+ return ntohs(port) >= a->start &&
+ ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+ return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+ a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+ struct fib_rule_port_range *b)
+{
+ return a->start == b->start &&
+ a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+ return rule->ip_proto ||
+ fib_rule_port_range_set(&rule->sport_range) ||
+ fib_rule_port_range_set(&rule->dport_range);
+}
+
struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
struct net *);
void fib_rules_unregister(struct fib_rules_ops *);
diff --git a/include/net/flow.h b/include/net/flow.h
index f1624fd5b1d0..64e7ee9cb980 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
fl4->daddr = daddr;
fl4->saddr = saddr;
}
-
+
struct flowi6 {
struct flowi_common __fl_common;
diff --git a/include/net/gre.h b/include/net/gre.h
index f90585decbce..797142eee9cd 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, __be16 proto, int nhs);
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
static inline int gre_calc_hlen(__be16 o_flags)
{
int addend = 4;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c1a93ce35e62..b68fea022a82 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
u16 net_header_len;
u16 net_frag_header_len;
u16 sockaddr_len;
- int (*setsockopt)(struct sock *sk, int level, int optname,
+ int (*setsockopt)(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
- int (*getsockopt)(struct sock *sk, int level, int optname,
+ int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
int (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
/** inet_connection_sock - INET connection oriented sock
*
- * @icsk_accept_queue: FIFO of established children
+ * @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
- __u16 rcv_mss; /* MSS used for delayed ACK decisions */
+ __u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
struct {
int enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
-
+
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
icsk->icsk_pending = 0;
#ifdef INET_CSK_CLEAR_TIMERS
diff --git a/include/net/ip.h b/include/net/ip.h
index 746abff9ce51..fe63ba95d12b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void ip4_datagram_release_cb(struct sock *sk);
struct ip_reply_arg {
- struct kvec iov[1];
+ struct kvec iov[1];
int flags;
__wsum csum;
int csumoffset; /* u16 offset of csum in iov[0].iov_base */
- /* -1 if not needed */
+ /* -1 if not needed */
int bound_dev_if;
u8 tos;
kuid_t uid;
-};
+};
#define IP_REPLY_ARG_NOSRCCHECK 1
@@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net);
/*
* Functions provided by ip_forward.c
*/
-
+
int ip_forward(struct sk_buff *skb);
-
+
/*
* Functions provided by ip_options.c
*/
-
+
void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 34ec321d6a03..8d906a35b534 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -415,6 +415,24 @@ void fib6_rules_cleanup(void);
bool fib6_rule_default(const struct fib_rule *rule);
int fib6_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi6 *fl6,
+ struct flow_keys *flkeys)
+{
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+ if (!net->ipv6.fib6_rules_require_fldissect)
+ return false;
+
+ skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ fl6->fl6_sport = flkeys->ports.src;
+ fl6->fl6_dport = flkeys->ports.dst;
+ fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+ return true;
+}
#else
static inline int fib6_rules_init(void)
{
@@ -436,5 +454,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
{
return 0;
}
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi6 *fl6,
+ struct flow_keys *flkeys)
+{
+ return false;
+}
#endif
#endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d23a65f3cd..da2bde5fda8f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+ struct flow_keys *hkeys);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
@@ -266,4 +267,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
}
+
#endif
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f80524396c06..8812582a94d5 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -157,7 +157,7 @@ struct fib_result_nl {
unsigned char nh_sel;
unsigned char type;
unsigned char scope;
- int err;
+ int err;
};
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
return 0;
}
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ return false;
+}
#else /* CONFIG_IP_MULTIPLE_TABLES */
int __net_init fib4_rules_init(struct net *net);
void __net_exit fib4_rules_exit(struct net *net);
@@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
int fib4_rules_dump(struct net *net, struct notifier_block *nb);
unsigned int fib4_rules_seq_read(struct net *net);
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+ struct sk_buff *skb,
+ struct flowi4 *fl4,
+ struct flow_keys *flkeys)
+{
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+ if (!net->ipv4.fib_rules_require_fldissect)
+ return false;
+
+ skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+ return true;
+}
+
#endif /* CONFIG_IP_MULTIPLE_TABLES */
/* Exported by fib_frontend.c */
@@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb);
+ const struct sk_buff *skb, struct flow_keys *flkeys);
#endif
void fib_select_multipath(struct fib_result *res, int hash);
void fib_select_path(struct net *net, struct fib_result *res,
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1f16773cfd76..cbe5addb9293 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -254,6 +254,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
#ifdef CONFIG_INET
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+ int proto,
+ __be32 daddr, __be32 saddr,
+ __be32 key, __u8 tos, int oif,
+ __u32 mark)
+{
+ memset(fl4, 0, sizeof(*fl4));
+ fl4->flowi4_oif = oif;
+ fl4->daddr = daddr;
+ fl4->saddr = saddr;
+ fl4->flowi4_tos = tos;
+ fl4->flowi4_proto = proto;
+ fl4->fl4_gre_key = key;
+ fl4->flowi4_mark = mark;
+}
+
int ip_tunnel_init(struct net_device *dev);
void ip_tunnel_uninit(struct net_device *dev);
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7a98cd583c73..cabd3cdd4015 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -105,8 +105,8 @@
#define IPV6_ADDR_ANY 0x0000U
-#define IPV6_ADDR_UNICAST 0x0001U
-#define IPV6_ADDR_MULTICAST 0x0002U
+#define IPV6_ADDR_UNICAST 0x0001U
+#define IPV6_ADDR_MULTICAST 0x0002U
#define IPV6_ADDR_LOOPBACK 0x0010U
#define IPV6_ADDR_LINKLOCAL 0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
#endif
}
-static inline void ipv6_addr_prefix(struct in6_addr *pfx,
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
const struct in6_addr *addr,
int plen)
{
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
addr[1] = wl;
}
-static inline void ipv6_addr_set(struct in6_addr *addr,
+static inline void ipv6_addr_set(struct in6_addr *addr,
__be32 w1, __be32 w2,
__be32 w3, __be32 w4)
{
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
}
/*
- * we should *never* get to this point since that
+ * we should *never* get to this point since that
* would mean the addrs are equal
*
* However, we do get to it 8) And exacly, when
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 44668c29701a..3a970e429ab6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -52,6 +52,7 @@ struct netns_ipv4 {
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
bool fib_has_custom_rules;
+ unsigned int fib_rules_require_fldissect;
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc4569cb8..e286fda09fcf 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -71,7 +71,8 @@ struct netns_ipv6 {
unsigned int ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- bool fib6_has_custom_rules;
+ unsigned int fib6_rules_require_fldissect;
+ bool fib6_has_custom_rules;
struct rt6_info *ip6_prohibit_entry;
struct rt6_info *ip6_blk_hole_entry;
struct fib6_table *fib6_local_tbl;
@@ -84,7 +85,7 @@ struct netns_ipv6 {
struct sock *mc_autojoin_sk;
#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
- struct mr6_table *mrt6;
+ struct mr_table *mrt6;
#else
struct list_head mr6_tables;
struct fib_rules_ops *mr6_rules_ops;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 87406252f0a3..e828d31be5da 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -806,6 +806,7 @@ enum tc_prio_command {
TC_PRIO_REPLACE,
TC_PRIO_DESTROY,
TC_PRIO_STATS,
+ TC_PRIO_GRAFT,
};
struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
struct gnet_stats_queue *qstats;
};
+struct tc_prio_qopt_offload_graft_params {
+ u8 band;
+ u32 child_handle;
+};
+
struct tc_prio_qopt_offload {
enum tc_prio_command command;
u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
union {
struct tc_prio_qopt_offload_params replace_params;
struct tc_qopt_offload_stats stats;
+ struct tc_prio_qopt_offload_graft_params graft_params;
};
};
+
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e2ab13687fb9..d4907b584b38 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
return false;
}
-/* Reset all TX qdiscs greater then index of a device. */
+/* Reset all TX qdiscs greater than index of a device. */
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
{
struct Qdisc *qdisc;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 92b06c6e7732..9c9b3768b350 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
#endif
/* tcp_output.c */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs);
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
int nonagle);
int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -981,8 +979,8 @@ struct tcp_congestion_ops {
u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* suggest number of segments for each skb to transmit (optional) */
- u32 (*tso_segs_goal)(struct sock *sk);
+ /* override sysctl_tcp_min_tso_segs */
+ u32 (*min_tso_segs)(struct sock *sk);
/* returns the multiplier used in tcp_sndbuf_expand (optional) */
u32 (*sndbuf_expand)(struct sock *sk);
/* call when packets are delivered to update cwnd and pacing rate,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7d2077665c0b..aa027ba1d032 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
static inline void xfrm_sk_free_policy(struct sock *sk) {}
static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{
- return 1;
-}
+{
+ return 1;
+}
static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
{
return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
{
if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
(ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
- ipv6_addr_any((struct in6_addr *)saddr) ||
+ ipv6_addr_any((struct in6_addr *)saddr) ||
ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
return 1;
return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
return -ENOPROTOOPT;
-}
+}
static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
{
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 28812eda4209..dc64cfaf13da 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,13 +20,11 @@ struct sock_extended_err {
#define SO_EE_ORIGIN_ICMP6 3
#define SO_EE_ORIGIN_TXSTATUS 4
#define SO_EE_ORIGIN_ZEROCOPY 5
-#define SO_EE_ORIGIN_ZCOOKIE 6
#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
#define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1))
#define SO_EE_CODE_ZEROCOPY_COPIED 1
-#define SO_EE_ORIGIN_MAX_ZCOOKIES 8
/**
* struct scm_timestamping - timestamps exposed through cmsg
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 77d90ae38114..232df14e1287 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
__u32 end;
};
+struct fib_rule_port_range {
+ __u16 start;
+ __u16 end;
+};
+
enum {
FRA_UNSPEC,
FRA_DST, /* destination address */
@@ -59,6 +64,9 @@ enum {
FRA_L3MDEV, /* iif or oif is l3mdev goto its table */
FRA_UID_RANGE, /* UID range */
FRA_PROTOCOL, /* Originator of the rule */
+ FRA_IP_PROTO, /* ip proto */
+ FRA_SPORT_RANGE, /* sport */
+ FRA_DPORT_RANGE, /* dport */
__FRA_MAX
};
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 12e3bca32cad..a66b213de3d7 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -104,6 +104,7 @@
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
#define RDS_CMSG_RXPATH_LATENCY 11
#define RDS_CMSG_ZCOPY_COOKIE 12
+#define RDS_CMSG_ZCOPY_COMPLETION 13
#define RDS_INFO_FIRST 10000
#define RDS_INFO_COUNTERS 10000
@@ -317,6 +318,12 @@ struct rds_rdma_notify {
#define RDS_RDMA_DROPPED 3
#define RDS_RDMA_OTHER_ERROR 4
+#define RDS_MAX_ZCOOKIES 8
+struct rds_zcopy_cookies {
+ __u32 num;
+ __u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
/*
* Common set of flags for all RDMA related structs
*/
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5fb69a85d967..3c74b163eaeb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -508,10 +508,6 @@ err:
static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
- BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
static void __mark_reg_not_init(struct bpf_reg_state *reg);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..bd0ed39f65fb 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
.exit = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct vlan_net),
+ .async = true,
};
static int __init vlan_proto_init(void)
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..7770481a6506 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net)
static struct pernet_operations br_net_ops = {
.exit = br_net_exit,
+ .async = true,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 27f1d4f2114a..484f54150525 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -967,6 +967,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
.exit = brnf_exit_net,
.id = &brnf_net_id,
.size = sizeof(struct brnf_net),
+ .async = true,
};
static struct notifier_block brnf_notifier __read_mostly = {
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ac5e5e34fee3..26730d39e048 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net)
static struct pernet_operations canbcm_pernet_ops __read_mostly = {
.init = canbcm_pernet_init,
.exit = canbcm_pernet_exit,
+ .async = true,
};
static int __init bcm_module_init(void)
diff --git a/net/core/dev.c b/net/core/dev.c
index 5bdcc5a161fe..40fb3aed5df2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a6aea805a0a2..f6f04fc0f629 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
!uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
return false;
+ if (fib_rule_port_range_set(&rule->sport_range))
+ return false;
+ if (fib_rule_port_range_set(&rule->dport_range))
+ return false;
return true;
}
EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -221,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
}
+static int nla_get_port_range(struct nlattr *pattr,
+ struct fib_rule_port_range *port_range)
+{
+ const struct fib_rule_port_range *pr = nla_data(pattr);
+
+ if (!fib_rule_port_range_valid(pr))
+ return -EINVAL;
+
+ port_range->start = pr->start;
+ port_range->end = pr->end;
+
+ return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+ struct fib_rule_port_range *range)
+{
+ return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
@@ -425,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
!uid_eq(r->uid_range.end, rule->uid_range.end))
continue;
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
if (!ops->compare(r, frh, tb))
continue;
return 1;
@@ -569,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
rule->uid_range = fib_kuid_range_unset;
}
+ if (tb[FRA_IP_PROTO])
+ rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &rule->sport_range);
+ if (err)
+ goto errout_free;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &rule->dport_range);
+ if (err)
+ goto errout_free;
+ }
+
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
@@ -634,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ struct fib_rule_port_range sprange = {0, 0};
+ struct fib_rule_port_range dprange = {0, 0};
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r;
struct nlattr *tb[FRA_MAX+1];
@@ -667,6 +721,20 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
range = fib_kuid_range_unset;
}
+ if (tb[FRA_SPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+ &sprange);
+ if (err)
+ goto errout;
+ }
+
+ if (tb[FRA_DPORT_RANGE]) {
+ err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+ &dprange);
+ if (err)
+ goto errout;
+ }
+
list_for_each_entry(rule, &ops->rules_list, list) {
if (tb[FRA_PROTOCOL] &&
(rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
@@ -712,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
!uid_eq(rule->uid_range.end, range.end)))
continue;
+ if (tb[FRA_IP_PROTO] &&
+ (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+ continue;
+
+ if (fib_rule_port_range_set(&sprange) &&
+ !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+ continue;
+
+ if (fib_rule_port_range_set(&dprange) &&
+ !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -790,7 +870,10 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_FWMASK */
+ nla_total_size_64bit(8) /* FRA_TUN_ID */
+ nla_total_size(sizeof(struct fib_kuid_range))
- + nla_total_size(1); /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_PROTOCOL */
+ + nla_total_size(1) /* FRA_IP_PROTO */
+ + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+ + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -855,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->l3mdev &&
nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
(uid_range_set(&rule->uid_range) &&
- nla_put_uid_range(skb, &rule->uid_range)))
+ nla_put_uid_range(skb, &rule->uid_range)) ||
+ (fib_rule_port_range_set(&rule->sport_range) &&
+ nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+ (fib_rule_port_range_set(&rule->dport_range) &&
+ nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+ (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 27a55236ad64..690e78c6af45 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -362,7 +362,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
}
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
static struct workqueue_struct *netns_wq;
static struct net *net_alloc(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1a7485a2cdfa..96d36b81a3a5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,8 +77,8 @@
#include <linux/capability.h>
#include <linux/user_namespace.h>
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 974765b7d92a..e4f305320519 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
/* We need an ipv6hdr as minimum len when calling xmit */
ldev->hard_header_len = sizeof(struct ipv6hdr);
ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+ ldev->priv_flags |= IFF_NO_QUEUE;
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index cb7176cd4cd6..9104943c15ba 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
static struct pernet_operations cfg802154_pernet_ops = {
.exit = cfg802154_pernet_exit,
+ .async = true,
};
static int __init wpan_phy_class_init(void)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.
+config IP_MROUTE_COMMON
+ bool
+ depends on IP_MROUTE || IPV6_MROUTE
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
+ select IP_MROUTE_COMMON
help
This is used if you want your machine to act as a router for IP
packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tos && (r->tos != fl4->flowi4_tos))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+ return 0;
+
return 1;
}
@@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
#endif
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect++;
+
rule4->src_len = frh->src_len;
rule4->srcmask = inet_make_mask(rule4->src_len);
rule4->dst_len = frh->dst_len;
@@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
net->ipv4.fib_num_tclassid_users--;
#endif
net->ipv4.fib_has_custom_rules = true;
+
+ if (net->ipv4.fib_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv4.fib_rules_require_fldissect--;
errout:
return err;
}
@@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
goto fail;
net->ipv4.rules_ops = ops;
net->ipv4.fib_has_custom_rules = false;
+ net->ipv4.fib_rules_require_fldissect = 0;
return 0;
fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index cd46d7666598..181b0d8d589c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
fnhe = rcu_dereference_protected(hash[i].chain, 1);
while (fnhe) {
struct fib_nh_exception *next;
-
+
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth_input);
@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, fl4, skb);
+ int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
fib_select_multipath(res, h);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5530cd6fdbc7..62243a8abf92 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,6 +50,7 @@
#define VERSION "0.409"
+#include <linux/cache.h>
#include <linux/uaccess.h>
#include <linux/bitops.h>
#include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
*/
static const int sync_pages = 128;
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
static inline struct tnode *tn_info(struct key_vector *kv)
{
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 914d56928578..1f04bd91fc2e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -6,6 +6,7 @@
* Authors: Andrey V. Savochkin <saw@msu.ru>
*/
+#include <linux/cache.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -51,7 +52,7 @@
* daddr: unchangeable
*/
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
void inet_peer_base_init(struct inet_peer_base *bp)
{
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 45d97e9b2759..0fe1d69b5df4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1044,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = {
.exit_batch = ipgre_exit_batch_net,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1322,6 +1323,12 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
+bool is_gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1623,6 +1630,7 @@ static struct pernet_operations ipgre_tap_net_ops = {
.exit_batch = ipgre_tap_exit_batch_net,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __net_init erspan_init_net(struct net *net)
@@ -1641,6 +1649,7 @@ static struct pernet_operations erspan_net_ops = {
.exit_batch = erspan_exit_batch_net,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __init ipgre_init(void)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d786a8441bce..b2117d89bc83 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -290,22 +290,6 @@ failed:
return ERR_PTR(err);
}
-static inline void init_tunnel_flow(struct flowi4 *fl4,
- int proto,
- __be32 daddr, __be32 saddr,
- __be32 key, __u8 tos, int oif,
- __u32 mark)
-{
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_oif = oif;
- fl4->daddr = daddr;
- fl4->saddr = saddr;
- fl4->flowi4_tos = tos;
- fl4->flowi4_proto = proto;
- fl4->fl4_gre_key = key;
- fl4->flowi4_mark = mark;
-}
-
static int ip_tunnel_bind_dev(struct net_device *dev)
{
struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4;
struct rtable *rt;
- init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
- iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+ iph->saddr, tunnel->parms.o_key,
+ RT_TOS(iph->tos), tunnel->parms.link,
+ tunnel->fwmark);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -581,8 +565,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
- init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+ RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
goto tx_error;
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -711,14 +695,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
if (tunnel->fwmark) {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.link, tunnel->fwmark);
}
else {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- skb->mark);
+ ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.link, skb->mark);
}
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 51b1669334fe..b10bf563afd9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = {
.exit_batch = vti_exit_batch_net,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c891235b4966..9c5a4d164f09 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = {
.exit_batch = ipip_exit_batch_net,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
+ .async = true,
};
static int __init ipip_init(void)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7c7ac9d32e77..d752a70855d8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,9 +28,9 @@
#include <linux/uaccess.h>
#include <linux/types.h>
+#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -52,7 +52,6 @@
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/route.h>
-#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
@@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
* In this case data path is free of exclusive locks at all.
*/
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);
@@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
#define ipmr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv4.mr_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv4.mr_tables)
+ return NULL;
+ return ret;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv4.mrt;
+ return NULL;
+}
+
static struct mr_table *ipmr_get_table(struct net *net, u32 id)
{
return net->ipv4.mrt;
@@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
}
static const struct rhashtable_params ipmr_rht_params = {
- .head_offset = offsetof(struct mfc_cache, mnode),
+ .head_offset = offsetof(struct mr_mfc, mnode),
.key_offset = offsetof(struct mfc_cache, cmparg),
.key_len = sizeof(struct mfc_cache_cmp_arg),
.nelem_hint = 3,
@@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = {
.automatic_shrinking = true,
};
+static void ipmr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+ list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+ .mfc_mcastgrp = htonl(INADDR_ANY),
+ .mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+ .rht_params = &ipmr_rht_params,
+ .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
static struct mr_table *ipmr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
if (mrt)
return mrt;
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return ERR_PTR(-ENOMEM);
- write_pnet(&mrt->net, net);
- mrt->id = id;
-
- rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
- INIT_LIST_HEAD(&mrt->mfc_cache_list);
- INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
- mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
- list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
- return mrt;
+ return mr_table_alloc(net, id, &ipmr_mr_table_ops,
+ ipmr_expire_process, ipmr_new_table_set);
}
static void ipmr_free_table(struct mr_table *mrt)
@@ -760,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
static void ipmr_cache_free_rcu(struct rcu_head *head)
{
- struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
- kmem_cache_free(mrt_cachep, c);
+ kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
}
void ipmr_cache_free(struct mfc_cache *c)
{
- call_rcu(&c->rcu, ipmr_cache_free_rcu);
+ call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
}
EXPORT_SYMBOL(ipmr_cache_free);
@@ -782,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
@@ -806,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
static void ipmr_expire_process(struct timer_list *t)
{
struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
- unsigned long now;
+ struct mr_mfc *c, *next;
unsigned long expires;
- struct mfc_cache *c, *next;
+ unsigned long now;
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -830,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t)
}
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
}
if (!list_empty(&mrt->mfc_unres_queue))
@@ -842,7 +867,7 @@ out:
}
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -944,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit,
+ vifc->vifc_threshold,
+ vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+ (VIFF_TUNNEL | VIFF_REGISTER));
attr.orig_dev = dev;
if (!switchdev_port_attr_get(dev, &attr)) {
@@ -952,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
} else {
v->dev_parent_id.id_len = 0;
}
- v->rate_limit = vifc->vifc_rate_limit;
+
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
- v->flags = vifc->vifc_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
- v->link = dev_get_iflink(dev);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -988,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- return c;
-
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
- int vifi)
-{
- struct mfc_cache_cmp_arg arg = {
- .mfc_mcastgrp = htonl(INADDR_ANY),
- .mfc_origin = htonl(INADDR_ANY)
- };
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
@@ -1025,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = htonl(INADDR_ANY)
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c, *proxy;
if (mcastgrp == htonl(INADDR_ANY))
- goto skip;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode) {
- if (c->mfc_un.res.ttls[vifi] < 255)
- return c;
-
- /* It's ok if the vifi is part of the static tree */
- proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
- if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
- return c;
- }
-
-skip:
- return ipmr_cache_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any_parent(mrt, vifi);
+ return mr_mfc_find_any(mrt, vifi, &arg);
}
/* Look for a (S,G,iif) entry if parent != -1 */
@@ -1055,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
.mfc_mcastgrp = mcastgrp,
.mfc_origin = origin,
};
- struct rhlist_head *tmp, *list;
- struct mfc_cache *c;
-
- list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
- rhl_for_each_entry_rcu(c, tmp, list, mnode)
- if (parent == -1 || parent == c->mfc_parent)
- return c;
- return NULL;
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
/* Allocate a multicast cache entry */
@@ -1072,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c) {
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXVIFS;
- refcount_set(&c->mfc_un.res.refcount, 1);
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXVIFS;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
}
return c;
}
@@ -1084,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c) {
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10*HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
}
return c;
}
@@ -1098,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct nlmsgerr *e;
/* Play the pending entries through our router */
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct iphdr));
- if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) -
(u8 *)nlh;
} else {
@@ -1211,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
int err;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr) {
found = true;
@@ -1230,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
/* Fill in the new cache entry */
- c->mfc_parent = -1;
+ c->_c.mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/* Reflect first query at mrouted. */
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
if (err < 0) {
/* If the report failed throw the cache entry
out - Brad Parker
@@ -1248,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
- mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+ mod_timer(&mrt->ipmr_expire_timer,
+ c->_c.mfc_un.unres.expires);
}
/* See if we can append the packet */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
@@ -1264,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
skb->dev = dev;
skb->skb_iif = dev->ifindex;
}
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1286,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
rcu_read_unlock();
if (!c)
return -ENOENT;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
- list_del_rcu(&c->list);
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+ list_del_rcu(&c->_c.list);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
ipmr_cache_put(c);
@@ -1299,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
struct mfcctl *mfc, int mrtsock, int parent)
{
struct mfc_cache *uc, *c;
+ struct mr_mfc *_uc;
bool found;
int ret;
@@ -1312,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
rcu_read_unlock();
if (c) {
write_lock_bh(&mrt_lock);
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
mrt->id);
@@ -1333,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
- c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+ c->_c.mfc_parent = mfc->mfcc_parent;
+ ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+ ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
ipmr_rht_params);
if (ret) {
pr_err("ipmr: rhtable insert error %d\n", ret);
ipmr_cache_free(c);
return ret;
}
- list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
/* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc_cache *)_uc;
if (uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
@@ -1377,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
struct net *net = read_pnet(&mrt->net);
- struct mfc_cache *c, *tmp;
+ struct mr_mfc *c, *tmp;
+ struct mfc_cache *cache;
LIST_HEAD(list);
int i;
@@ -1395,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
- call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ cache = (struct mfc_cache *)c;
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
mrt->id);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_put(c);
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_cache_put(cache);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_destroy_unres(mrt, c);
+ cache = (struct mfc_cache *)c;
+ mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+ ipmr_destroy_unres(mrt, cache);
}
spin_unlock_bh(&mfc_unres_lock);
}
@@ -1698,9 +1677,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1772,9 +1751,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1998,26 +1977,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
/* "local" means that we should preserve one skb (for local delivery) */
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
struct net_device *dev, struct sk_buff *skb,
- struct mfc_cache *cache, int local)
+ struct mfc_cache *c, int local)
{
int true_vifi = ipmr_find_vif(mrt, dev);
int psend = -1;
int vif, ct;
- vif = cache->mfc_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+ if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incomming
* interface is part of the static tree.
*/
- cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
goto forward;
}
@@ -2038,7 +2017,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
goto dont_forward;
}
- cache->mfc_un.res.wrong_if++;
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2047,10 +2026,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
* large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
}
goto dont_forward;
@@ -2061,33 +2041,33 @@ forward:
mrt->vif_table[vif].bytes_in += skb->len;
/* Forward the frame */
- if (cache->mfc_origin == htonl(INADDR_ANY) &&
- cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ if (c->mfc_origin == htonl(INADDR_ANY) &&
+ c->mfc_mcastgrp == htonl(INADDR_ANY)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mfc_parent &&
+ true_vifi != c->_c.mfc_parent &&
ip_hdr(skb)->ttl >
- cache->mfc_un.res.ttls[cache->mfc_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mfc_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1;
- ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+ if ((c->mfc_origin != htonl(INADDR_ANY) ||
ct != true_vifi) &&
- ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+ ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, cache, psend);
+ skb2, c, psend);
}
psend = ct;
}
@@ -2099,9 +2079,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- cache, psend);
+ c, psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
return;
}
}
@@ -2299,62 +2279,6 @@ drop:
}
#endif
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- struct mfc_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mfc_parent >= MAXVIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (VIF_EXISTS(mrt, c->mfc_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
-
- if (c->mfc_flags & MFC_OFFLOAD)
- rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
- if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, u32 portid)
@@ -2412,7 +2336,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
read_lock(&mrt_lock);
- err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
rcu_read_unlock();
return err;
@@ -2440,7 +2364,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2449,7 +2373,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
goto nla_put_failure;
- err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2462,6 +2386,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+ int flags)
+{
+ return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+ cmd, flags);
+}
+
static size_t mroute_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2490,7 +2422,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+ skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+ mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2634,62 +2567,8 @@ errout_free:
static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr_table *mrt;
- struct mfc_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_e = cb->args[1];
-
- rcu_read_lock();
- ipmr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
- if (e < s_e)
- goto next_entry;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = 0;
- s_e = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = 0;
- s_e = 0;
-next_table:
- t++;
- }
-done:
- rcu_read_unlock();
-
- cb->args[1] = e;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+ _ipmr_fill_mroute, &mfc_unres_lock);
}
static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
@@ -2946,31 +2825,11 @@ out:
/* The /proc interfaces to multicast routing :
* /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
-{
- struct mr_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
-}
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -2981,26 +2840,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ipmr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!VIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -3011,7 +2851,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
@@ -3019,7 +2859,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
} else {
const struct vif_device *vif = v;
- const char *name = vif->dev ? vif->dev->name : "none";
+ const char *name = vif->dev ?
+ vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
@@ -3033,7 +2874,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_vif_seq_ops = {
.start = ipmr_vif_seq_start,
- .next = ipmr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ipmr_vif_seq_stop,
.show = ipmr_vif_seq_show,
};
@@ -3041,7 +2882,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
static int ipmr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ipmr_vif_fops = {
@@ -3051,40 +2892,8 @@ static const struct file_operations ipmr_vif_fops = {
.release = seq_release_net,
};
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr_table *mrt;
- struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
-{
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc;
-
- rcu_read_lock();
- it->cache = &mrt->mfc_cache_list;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
- if (pos-- == 0)
- return mfc;
- rcu_read_unlock();
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
-
- it->cache = NULL;
- return NULL;
-}
-
-
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
@@ -3092,54 +2901,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr_table *mrt = it->mrt;
- struct mfc_cache *mfc = v;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc_cache, list);
-
- if (it->cache == &mrt->mfc_unres_queue)
- goto end_of_list;
-
- /* exhausted cache_array, show unresolved */
- rcu_read_unlock();
- it->cache = &mrt->mfc_unres_queue;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc_cache, list);
-
-end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc_cache_list)
- rcu_read_unlock();
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3151,26 +2913,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Group Origin Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
+ const struct mr_mfc_iter *it = seq->private;
const struct mr_table *mrt = it->mrt;
seq_printf(seq, "%08X %08X %-3hd",
(__force u32) mfc->mfc_mcastgrp,
(__force u32) mfc->mfc_origin,
- mfc->mfc_parent);
+ mfc->_c.mfc_parent);
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
" %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ n, mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -3185,15 +2947,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ipmr_mfc_fops = {
@@ -3229,7 +2991,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
ipmr_for_each_table(mrt, net) {
struct vif_device *v = &mrt->vif_table[0];
- struct mfc_cache *mfc;
+ struct mr_mfc *mfc;
int vifi;
/* Notifiy on table VIF entries */
@@ -3246,7 +3008,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
/* Notify on table MFC entries */
list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
call_ipmr_mfc_entry_notifier(nb, net,
- FIB_EVENT_ENTRY_ADD, mfc,
+ FIB_EVENT_ENTRY_ADD,
+ (struct mfc_cache *)mfc,
mrt->id);
}
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..8ba55bfda817
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,323 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+ struct net_device *dev,
+ unsigned long rate_limit,
+ unsigned char threshold,
+ unsigned short flags,
+ unsigned short get_iflink_mask)
+{
+ v->dev = NULL;
+ v->bytes_in = 0;
+ v->bytes_out = 0;
+ v->pkt_in = 0;
+ v->pkt_out = 0;
+ v->rate_limit = rate_limit;
+ v->flags = flags;
+ v->threshold = threshold;
+ if (v->flags & get_iflink_mask)
+ v->link = dev_get_iflink(dev);
+ else
+ v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+ struct mr_table_ops *ops,
+ void (*expire_func)(struct timer_list *t),
+ void (*table_set)(struct mr_table *mrt,
+ struct net *net))
+{
+ struct mr_table *mrt;
+
+ mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+ if (!mrt)
+ return NULL;
+ mrt->id = id;
+ write_pnet(&mrt->net, net);
+
+ mrt->ops = *ops;
+ rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ INIT_LIST_HEAD(&mrt->mfc_cache_list);
+ INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+ timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+ mrt->mroute_reg_vif_num = -1;
+ table_set(mrt, net);
+ return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (parent == -1 || parent == c->mfc_parent)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c;
+
+ list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+ *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode)
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+ struct rhlist_head *tmp, *list;
+ struct mr_mfc *c, *proxy;
+
+ list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+ rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+ if (c->mfc_un.res.ttls[vifi] < 255)
+ return c;
+
+ /* It's ok if the vifi is part of the static tree */
+ proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+ if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+ return c;
+ }
+
+ return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+ struct mr_table *mrt = iter->mrt;
+
+ for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ if (pos-- == 0)
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct mr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = iter->mrt;
+
+ ++*pos;
+ if (v == SEQ_START_TOKEN)
+ return mr_vif_seq_idx(net, iter, 0);
+
+ while (++iter->ct < mrt->maxvif) {
+ if (!VIF_EXISTS(mrt, iter->ct))
+ continue;
+ return &mrt->vif_table[iter->ct];
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
+void *mr_mfc_seq_idx(struct net *net,
+ struct mr_mfc_iter *it, loff_t pos)
+{
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ it->cache = &mrt->mfc_cache_list;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ if (pos-- == 0)
+ return mfc;
+ rcu_read_unlock();
+
+ spin_lock_bh(it->lock);
+ it->cache = &mrt->mfc_unres_queue;
+ list_for_each_entry(mfc, it->cache, list)
+ if (pos-- == 0)
+ return mfc;
+ spin_unlock_bh(it->lock);
+
+ it->cache = NULL;
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct mr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+ struct mr_table *mrt = it->mrt;
+ struct mr_mfc *c = v;
+
+ ++*pos;
+
+ if (v == SEQ_START_TOKEN)
+ return mr_mfc_seq_idx(net, seq->private, 0);
+
+ if (c->list.next != it->cache)
+ return list_entry(c->list.next, struct mr_mfc, list);
+
+ if (it->cache == &mrt->mfc_unres_queue)
+ goto end_of_list;
+
+ /* exhausted cache_array, show unresolved */
+ rcu_read_unlock();
+ it->cache = &mrt->mfc_unres_queue;
+
+ spin_lock_bh(it->lock);
+ if (!list_empty(it->cache))
+ return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+ spin_unlock_bh(it->lock);
+ it->cache = NULL;
+
+ return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ struct mr_mfc *c, struct rtmsg *rtm)
+{
+ struct rta_mfc_stats mfcs;
+ struct nlattr *mp_attr;
+ struct rtnexthop *nhp;
+ unsigned long lastuse;
+ int ct;
+
+ /* If cache is unresolved, don't try to parse IIF and OIF */
+ if (c->mfc_parent >= MAXVIFS) {
+ rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+ return -ENOENT;
+ }
+
+ if (VIF_EXISTS(mrt, c->mfc_parent) &&
+ nla_put_u32(skb, RTA_IIF,
+ mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+ return -EMSGSIZE;
+
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+ mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp_attr)
+ return -EMSGSIZE;
+
+ for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+ if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+ struct vif_device *vif;
+
+ nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+ if (!nhp) {
+ nla_nest_cancel(skb, mp_attr);
+ return -EMSGSIZE;
+ }
+
+ nhp->rtnh_flags = 0;
+ nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+ vif = &mrt->vif_table[ct];
+ nhp->rtnh_ifindex = vif->dev->ifindex;
+ nhp->rtnh_len = sizeof(*nhp);
+ }
+ }
+
+ nla_nest_end(skb, mp_attr);
+
+ lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+ lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+ mfcs.mfcs_packets = c->mfc_un.res.pkt;
+ mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+ mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+ nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+ RTA_PAD))
+ return -EMSGSIZE;
+
+ rtm->rtm_type = RTN_MULTICAST;
+ return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mr_table *(*iter)(struct net *net,
+ struct mr_table *mrt),
+ int (*fill)(struct mr_table *mrt,
+ struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags),
+ spinlock_t *lock)
+{
+ unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+ struct net *net = sock_net(skb->sk);
+ struct mr_table *mrt;
+ struct mr_mfc *mfc;
+
+ rcu_read_lock();
+ for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+ if (t < s_t)
+ goto next_table;
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ if (e < s_e)
+ goto next_entry;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ goto done;
+next_entry:
+ e++;
+ }
+ e = 0;
+ s_e = 0;
+
+ spin_lock_bh(lock);
+ list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+ if (e < s_e)
+ goto next_entry2;
+ if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, mfc,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+ spin_unlock_bh(lock);
+ goto done;
+ }
+next_entry2:
+ e++;
+ }
+ spin_unlock_bh(lock);
+ e = 0;
+ s_e = 0;
+next_table:
+ t++;
+ }
+done:
+ rcu_read_unlock();
+
+ cb->args[1] = e;
+ cb->args[0] = t;
+
+ return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4b02ab39ebc5..08b3e48f44fc 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -840,6 +840,7 @@ static struct pernet_operations clusterip_net_ops = {
.exit = clusterip_net_exit,
.id = &clusterip_net_id,
.size = sizeof(struct clusterip_net),
+ .async = true,
};
static int __init clusterip_tg_init(void)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index a0d3ad60a411..57244b62a4fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net)
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
+ .async = true,
};
static int __init nf_defrag_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index fdabc70283b6..d97e83b2dd33 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void)
{
return register_pernet_subsys(&ip_proc_ops);
}
-
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 26eefa2eaa44..3bb686dac273 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
/* if skb is set it will be used and fl4 can be NULL */
int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
- const struct sk_buff *skb)
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct net *net = fi->fib_net;
struct flow_keys hash_keys;
@@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
if (skb->l4_hash)
return skb_get_hash_raw(skb) >> 1;
memset(&hash_keys, 0, sizeof(hash_keys));
- skb_flow_dissect_flow_keys(skb, &keys, flag);
- hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
- hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
- hash_keys.ports.src = keys.ports.src;
- hash_keys.ports.dst = keys.ports.dst;
- hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (flkeys) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+ } else {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ hash_keys.ports.src = keys.ports.src;
+ hash_keys.ports.dst = keys.ports.dst;
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ }
} else {
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos,
+ struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1) {
- int h = fib_multipath_hash(res->fi, NULL, skb);
+ int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
fib_select_multipath(res, h);
}
@@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result *res)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct flow_keys *flkeys = NULL, _flkeys;
+ struct net *net = dev_net(dev);
struct ip_tunnel_info *tun_info;
- struct flowi4 fl4;
+ int err = -EINVAL;
unsigned int flags = 0;
u32 itag = 0;
struct rtable *rth;
- int err = -EINVAL;
- struct net *net = dev_net(dev);
+ struct flowi4 fl4;
bool do_cache;
/* IP on this device is disabled. */
@@ -1933,6 +1944,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+ flkeys = &_flkeys;
+
err = fib_lookup(net, &fl4, res, 0);
if (err != 0) {
if (!IN_DEV_FORWARD(in_dev))
@@ -1958,7 +1973,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res->type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
out: return err;
brd_input:
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..c92014cb1e16 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
- tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
- unused:5,
+ unused:12,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
@@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
sk->sk_pacing_rate = rate;
}
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
{
- struct bbr *bbr = inet_csk_ca(sk);
-
- return bbr->tso_segs_goal;
+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bbr *bbr = inet_csk_ca(sk);
- u32 min_segs;
+ u32 segs, bytes;
+
+ /* Sort of tcp_tso_autosize() but ignoring
+ * driver provided sk_gso_max_size.
+ */
+ bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
- bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
- 0x7FU);
+ return min(segs, 0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
- cwnd += 3 * bbr->tso_segs_goal;
+ cwnd += 3 * bbr_tso_segs_goal(sk);
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
@@ -824,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
- bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
@@ -834,7 +834,6 @@ static void bbr_init(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
- bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
@@ -936,7 +935,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .tso_segs_goal = bbr_tso_segs_goal,
+ .min_tso_segs = bbr_min_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 49d043de3476..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
- int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
@@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
return segs;
}
-EXPORT_SYMBOL(tcp_tso_autosize);
/* Return the number of segments we want in the skb we are transmitting.
* See if congestion control module wants to decide; otherwise, autosize.
@@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ u32 min_tso, tso_segs;
- if (!tso_segs)
- tso_segs = tcp_tso_autosize(sk, mss_now,
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
for (handler = rcu_dereference(head); \
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-
+
static int tunnel4_rcv(struct sk_buff *skb)
{
struct xfrm_tunnel *handler;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 796ac4115485..0c752dc3f93b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -379,4 +379,3 @@ void __init xfrm4_init(void)
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
+ select IP_MROUTE_COMMON
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4facfe0b1888..b5fd116c046a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
+static bool ipv6_allow_optimistic_dad(struct net *net,
+ struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
@@ -1968,6 +1983,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags &= ~IFA_F_OPTIMISTIC;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
@@ -4501,6 +4518,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
+ if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -4574,6 +4594,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
+ struct inet6_dev *idev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
u32 ifa_flags;
int err;
@@ -4607,7 +4628,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ idev = ipv6_find_idev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ if (!ipv6_allow_optimistic_dad(net, idev))
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+ if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+ return -EINVAL;
+ }
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (!ifa) {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..d7d0abc7fd0e 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 95a2c9e8699a..04e5f523e50f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -223,6 +223,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ return 0;
+
return 1;
}
@@ -258,12 +269,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect++;
+
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+ struct net *net = rule->fr_net;
+
+ if (net->ipv6.fib6_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect--;
+
+ return 0;
+}
+
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
@@ -323,6 +348,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
+ .delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
@@ -350,6 +376,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
+ net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4fa4f1b150a4..b0778d323b6e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 44c39c5f0638..e438699f000f 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = {
.exit = ila_exit_net,
.id = &ila_net_id,
.size = sizeof(struct ila_net),
+ .async = true,
};
static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c353125546d..4f150a394387 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1517,6 +1517,7 @@ static struct pernet_operations ip6gre_net_ops = {
.exit_batch = ip6gre_exit_batch_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
+ .async = true,
};
static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1784,6 +1785,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 997c7f19ad62..a6eb0e699b15 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(net, skb) &&
+ ((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4b15fe928278..869e2e6750f7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2250,6 +2250,7 @@ static struct pernet_operations ip6_tnl_net_ops = {
.exit_batch = ip6_tnl_exit_batch_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
+ .async = true,
};
/**
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index fa3ae1cb50d3..c617ea17faa8 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = {
.exit_batch = vti6_exit_batch_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
+ .async = true,
};
static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 295eb5ecaee5..2a38f9de45d3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -32,11 +31,9 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
@@ -54,30 +51,12 @@
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
-struct mr6_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock *mroute6_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc6_unres_queue;
- struct list_head mfc6_cache_array[MFC6_LINES];
- struct mif_device vif6_table[MAXMIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
- int mroute_reg_vif_num;
-#endif
-};
-
struct ip6mr_rule {
struct fib_rule common;
};
struct ip6mr_result {
- struct mr6_table *mrt;
+ struct mr_table *mrt;
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv6.mr6_tables)
+ return NULL;
+ return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
ip6mr_for_each_table(mrt, net) {
if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
int err;
struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct ip6mr_result *res = arg->result;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
static int __net_init ip6mr_rules_init(struct net *net)
{
struct fib_rules_ops *ops;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
int err;
ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- struct mr6_table *mrt, *next;
+ struct mr_table *mrt, *next;
rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv6.mrt6;
+ return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
return net->ipv6.mrt6;
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
*mrt = net->ipv6.mrt6;
return 0;
@@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
}
#endif
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
{
- struct mr6_table *mrt;
- unsigned int i;
+ const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+ struct mfc6_cache *c = (struct mfc6_cache *)ptr;
- mrt = ip6mr_get_table(net, id);
- if (mrt)
- return mrt;
-
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return NULL;
- mrt->id = id;
- write_pnet(&mrt->net, net);
-
- /* Forwarding cache */
- for (i = 0; i < MFC6_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
- INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+ return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+ !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+ .head_offset = offsetof(struct mr_mfc, mnode),
+ .key_offset = offsetof(struct mfc6_cache, cmparg),
+ .key_len = sizeof(struct mfc6_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ip6mr_hash_cmp,
+ .automatic_shrinking = true,
+};
-#ifdef CONFIG_IPV6_PIMSM_V2
- mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
- return mrt;
}
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
- del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
- kfree(mrt);
-}
-
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- struct list_head *cache;
- int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+ .mf6c_origin = IN6ADDR_ANY_INIT,
+ .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};
+static struct mr_table_ops ip6mr_mr_table_ops = {
+ .rht_params = &ip6mr_rht_params,
+ .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
- struct mr6_table *mrt = it->mrt;
- struct mfc6_cache *mfc;
-
- read_lock(&mrt_lock);
- for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
- read_unlock(&mrt_lock);
+ struct mr_table *mrt;
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc6_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
+ mrt = ip6mr_get_table(net, id);
+ if (mrt)
+ return mrt;
- it->cache = NULL;
- return NULL;
+ return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+ ipmr_expire_process, ip6mr_new_table_set);
}
-/*
- * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
{
- struct mr6_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
+ kfree(mrt);
}
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
@@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ip6mr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
- struct mr6_table *mrt = iter->mrt;
+ struct mr_vif_iter *iter = seq->private;
+ struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
- const struct mif_device *vif = v;
+ const struct vif_device *vif = v;
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
- vif - mrt->vif6_table,
+ vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags);
@@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ip6mr_vif_seq_ops = {
.start = ip6mr_vif_seq_start,
- .next = ip6mr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ip6mr_vif_seq_stop,
.show = ip6mr_vif_seq_show,
};
@@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = {
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct mfc6_cache *mfc = v;
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = it->mrt;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc6_cache, list);
-
- if (it->cache == &mrt->mfc6_unres_queue)
- goto end_of_list;
-
- BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
- while (++it->ct < MFC6_LINES) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc6_cache, list);
- }
-
- /* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
- it->cache = &mrt->mfc6_unres_queue;
- it->ct = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc6_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc6_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc6_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
+ const struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
seq_printf(seq, "%pI6 %pI6 %-3hd",
&mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
- mfc->mf6c_parent);
+ mfc->_c.mfc_parent);
- if (it->cache != &mrt->mfc6_unres_queue) {
+ if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
- if (MIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
+ if (VIF_EXISTS(mrt, n) &&
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
- " %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ " %2d:%-3d", n,
+ mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb)
struct ipv6hdr *encap;
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ reg_dev = mrt->vif_table[reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
@@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
char name[IFNAMSIZ];
@@ -773,17 +657,17 @@ failure:
* Delete a VIF entry
*/
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
- struct mif_device *v;
+ struct vif_device *v;
struct net_device *dev;
struct inet6_dev *in6_dev;
if (vifi < 0 || vifi >= mrt->maxvif)
return -EADDRNOTAVAIL;
- v = &mrt->vif6_table[vifi];
+ v = &mrt->vif_table[vifi];
write_lock_bh(&mrt_lock);
dev = v->dev;
@@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
if (vifi + 1 == mrt->maxvif) {
int tmp;
for (tmp = vifi - 1; tmp >= 0; tmp--) {
- if (MIF_EXISTS(mrt, tmp))
+ if (VIF_EXISTS(mrt, tmp))
break;
}
mrt->maxvif = tmp + 1;
@@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
return 0;
}
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+ kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
- kmem_cache_free(mrt_cachep, c);
+ call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
@@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
/* Timer process for all the unresolved queue. */
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
{
unsigned long now = jiffies;
unsigned long expires = 10 * HZ;
- struct mfc6_cache *c, *next;
+ struct mr_mfc *c, *next;
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
if (time_after(c->mfc_un.unres.expires, now)) {
/* not yet... */
unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
}
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
return;
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
ipmr_do_expire_process(mrt);
spin_unlock(&mfc_unres_lock);
@@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t)
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+ struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
- if (MIF_EXISTS(mrt, vifi) &&
+ if (VIF_EXISTS(mrt, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
cache->mfc_un.res.lastuse = jiffies;
}
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
struct mif6ctl *vifc, int mrtsock)
{
int vifi = vifc->mif6c_mifi;
- struct mif_device *v = &mrt->vif6_table[vifi];
+ struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct inet6_dev *in6_dev;
int err;
/* Is vif busy ? */
- if (MIF_EXISTS(mrt, vifi))
+ if (VIF_EXISTS(mrt, vifi))
return -EADDRINUSE;
switch (vifc->mif6c_flags) {
@@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
dev->ifindex, &in6_dev->cnf);
}
- /*
- * Fill in the VIF structures
- */
- v->rate_limit = vifc->vifc_rate_limit;
- v->flags = vifc->mif6c_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & MIFF_REGISTER)
- v->link = dev_get_iflink(dev);
+ /* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+ vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+ MIFF_REGISTER);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
return 0;
}
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
const struct in6_addr *origin,
const struct in6_addr *mcastgrp)
{
- int line = MFC6_HASH(mcastgrp, origin);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
- return c;
- }
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
- mifi_t mifi)
-{
- int line = MFC6_HASH(&in6addr_any, &in6addr_any);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_any(&c->mf6c_mcastgrp) &&
- (c->mfc_un.res.ttls[mifi] < 255))
- return c;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
struct in6_addr *mcastgrp,
mifi_t mifi)
{
- int line = MFC6_HASH(mcastgrp, &in6addr_any);
- struct mfc6_cache *c, *proxy;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = in6addr_any,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
if (ipv6_addr_any(mcastgrp))
- goto skip;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
- if (c->mfc_un.res.ttls[mifi] < 255)
- return c;
-
- /* It's ok if the mifi is part of the static tree */
- proxy = ip6mr_cache_find_any_parent(mrt,
- c->mf6c_parent);
- if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
- return c;
- }
+ return mr_mfc_find_any_parent(mrt, mifi);
+ return mr_mfc_find_any(mrt, mifi, &arg);
+}
-skip:
- return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp,
+ int parent)
+{
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
+
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc6_cache *ip6mr_cache_alloc(void)
{
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXMIFS;
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXMIFS;
return c;
}
@@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (!c)
return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
return c;
}
@@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
* A cache entry has gone into a resolved state from queued
*/
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc6_cache *uc, struct mfc6_cache *c)
{
struct sk_buff *skb;
@@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Play the pending entries through our router
*/
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Called under mrt_lock.
*/
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
+ struct sock *mroute6_sk;
struct sk_buff *skb;
struct mrt6msg *msg;
int ret;
@@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (!mrt->mroute6_sk) {
+ rcu_read_lock();
+ mroute6_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute6_sk) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
mrt6msg_netlink_event(mrt, skb);
- /*
- * Deliver to user space multicast routing algorithms
- */
- ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ /* Deliver to user space multicast routing algorithms */
+ ret = sock_queue_rcv_skb(mroute6_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+ struct sk_buff *skb)
{
+ struct mfc6_cache *c;
bool found = false;
int err;
- struct mfc6_cache *c;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
found = true;
@@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
- c->mf6c_parent = -1;
+ /* Fill in the new cache entry */
+ c->_c.mfc_parent = -1;
c->mf6c_origin = ipv6_hdr(skb)->saddr;
c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
@@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc6_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ /* See if we can append the packet */
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
int parent)
{
- int line;
- struct mfc6_cache *c, *next;
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+ struct mfc6_cache *c;
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == c->mf6c_parent)) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+ list_del_rcu(&c->_c.list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
+ ip6mr_cache_free(c);
+ return 0;
}
static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
- struct mif_device *v;
+ struct mr_table *mrt;
+ struct vif_device *v;
int ct;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
ip6mr_for_each_table(mrt, net) {
- v = &mrt->vif6_table[0];
+ v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
mif6_delete(mrt, ct, 1, NULL);
@@ -1453,14 +1307,14 @@ void ip6_mr_cleanup(void)
kmem_cache_destroy(mrt_cachep);
}
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
struct mf6cctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
- struct mfc6_cache *uc, *c;
unsigned char ttls[MAXMIFS];
- int i;
+ struct mfc6_cache *uc, *c;
+ struct mr_mfc *_uc;
+ bool found;
+ int i, err;
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
@@ -1469,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
ttls[i] = 1;
-
- }
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == mfc->mf6cc_parent)) {
- found = true;
- break;
- }
}
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
@@ -1505,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc6_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+ ip6mr_rht_params);
+ if (err) {
+ pr_err("ip6mr: rhtable insert error %d\n", err);
+ ip6mr_cache_free(c);
+ return err;
+ }
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc6_cache *)_uc;
if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
}
}
- if (list_empty(&mrt->mfc6_unres_queue))
+ if (list_empty(&mrt->mfc_unres_queue))
del_timer(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
@@ -1545,61 +1396,54 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mr_mfc *c, *tmp;
LIST_HEAD(list);
- struct mfc6_cache *c, *next;
+ int i;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
continue;
mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
- for (i = 0; i < MFC6_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- }
+ /* Wipe the cache */
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_cache_free((struct mfc6_cache *)c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
{
int err = 0;
struct net *net = sock_net(sk);
rtnl_lock();
write_lock_bh(&mrt_lock);
- if (likely(mrt->mroute6_sk == NULL)) {
- mrt->mroute6_sk = sk;
- net->ipv6.devconf_all->mc_forwarding++;
- } else {
+ if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ net->ipv6.devconf_all->mc_forwarding++;
}
write_unlock_bh(&mrt_lock);
@@ -1617,7 +1461,7 @@ int ip6mr_sk_done(struct sock *sk)
{
int err = -EACCES;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1625,9 +1469,9 @@ int ip6mr_sk_done(struct sock *sk)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
- if (sk == mrt->mroute6_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
write_lock_bh(&mrt_lock);
- mrt->mroute6_sk = NULL;
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1641,13 +1485,14 @@ int ip6mr_sk_done(struct sock *sk)
}
}
rtnl_unlock();
+ synchronize_rcu();
return err;
}
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
@@ -1657,8 +1502,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
- return mrt->mroute6_sk;
+ return rcu_access_pointer(mrt->mroute_sk);
}
+EXPORT_SYMBOL(mroute6_is_socket);
/*
* Socket options and virtual interface manipulation. The whole
@@ -1674,7 +1520,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
struct mf6cctl mfc;
mifi_t mifi;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1685,7 +1531,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1707,7 +1554,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
rtnl_lock();
- ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ ret = mif6_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
@@ -1742,7 +1590,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
ret = ip6mr_mfc_add(net, mrt, &mfc,
- sk == mrt->mroute6_sk, parent);
+ sk ==
+ rtnl_dereference(mrt->mroute_sk),
+ parent);
rtnl_unlock();
return ret;
@@ -1794,7 +1644,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
return -EINVAL;
- if (sk == mrt->mroute6_sk)
+ if (sk == rcu_access_pointer(mrt->mroute_sk))
return -EBUSY;
rtnl_lock();
@@ -1825,7 +1675,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
int olr;
int val;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1873,10 +1723,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
struct sioc_sg_req6 sr;
struct sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1889,8 +1739,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1907,19 +1757,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -1947,10 +1797,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
struct compat_sioc_sg_req6 sr;
struct compat_sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1963,8 +1813,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1981,19 +1831,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -2014,11 +1864,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *c, int vifi)
{
struct ipv6hdr *ipv6h;
- struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
@@ -2088,46 +1938,50 @@ out_free:
return 0;
}
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif6_table[ct].dev == dev)
+ if (mrt->vif_table[ct].dev == dev)
break;
}
return ct;
}
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
int true_vifi = ip6mr_find_vif(mrt, skb->dev);
- vif = cache->mf6c_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ rcu_read_lock();
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+ rcu_read_unlock();
goto forward;
+ }
+ rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif6_table[vif].dev != skb->dev) {
- cache->mfc_un.res.wrong_if++;
+ if (mrt->vif_table[vif].dev != skb->dev) {
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2136,52 +1990,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
}
goto dont_forward;
}
forward:
- mrt->vif6_table[vif].pkt_in++;
- mrt->vif6_table[vif].bytes_in += skb->len;
+ mrt->vif_table[vif].pkt_in++;
+ mrt->vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
- if (ipv6_addr_any(&cache->mf6c_origin) &&
- ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mf6c_parent &&
+ true_vifi != c->_c.mfc_parent &&
ipv6_hdr(skb)->hop_limit >
- cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mf6c_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
- ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2, cache, psend);
+ ip6mr_forward2(net, mrt, skb2,
+ c, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, cache, psend);
+ ip6mr_forward2(net, mrt, skb, c, psend);
return;
}
@@ -2198,7 +2055,7 @@ int ip6_mr_input(struct sk_buff *skb)
{
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -2248,66 +2105,11 @@ int ip6_mr_input(struct sk_buff *skb)
return 0;
}
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mf6c_parent >= MAXMIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (MIF_EXISTS(mrt, c->mf6c_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
- mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
- if (!mp_attr)
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
- if (!nhp) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
int err;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct mfc6_cache *cache;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -2368,15 +2170,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
return err;
}
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- cache->mfc_flags |= MFC_NOTIFY;
-
- err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
return err;
}
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
int flags)
{
@@ -2398,7 +2197,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2407,7 +2206,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
goto nla_put_failure;
- err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2420,6 +2219,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags)
+{
+ return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+ cmd, flags);
+}
+
static int mr6_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2441,14 +2248,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
return len;
}
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2483,7 +2290,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2533,65 +2340,6 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr6_table *mrt;
- struct mfc6_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
-
- read_lock(&mrt_lock);
- ip6mr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC6_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = s_e = 0;
- }
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
-next_table:
- t++;
- }
-done:
- read_unlock(&mrt_lock);
-
- cb->args[2] = e;
- cb->args[1] = h;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+ _ip6mr_fill_mroute, &mfc_unres_lock);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index c87b48359e8f..32f98bc06900 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net)
static struct pernet_operations defrag6_net_ops = {
.exit = defrag6_net_exit,
+ .async = true,
};
static int __init nf_defrag_init(void)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b8858c546f41..1678cf037688 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aa709b644945..e2bb40824c85 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -460,7 +460,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash)
- fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+ fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match;
@@ -1786,10 +1786,12 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
static void ip6_multipath_l3_keys(const struct sk_buff *skb,
- struct flow_keys *keys)
+ struct flow_keys *keys,
+ struct flow_keys *flkeys)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
const struct ipv6hdr *key_iph = outer_iph;
+ struct flow_keys *_flkeys = flkeys;
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
@@ -1811,22 +1813,31 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
goto out;
key_iph = inner_iph;
+ _flkeys = NULL;
out:
memset(keys, 0, sizeof(*keys));
keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys->addrs.v6addrs.src = key_iph->saddr;
- keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
- keys->basic.ip_proto = key_iph->nexthdr;
+ if (_flkeys) {
+ keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+ keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+ keys->tags.flow_label = _flkeys->tags.flow_label;
+ keys->basic.ip_proto = _flkeys->basic.ip_proto;
+ } else {
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+ }
}
/* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+ struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
if (skb) {
- ip6_multipath_l3_keys(skb, &hash_keys);
+ ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
return flow_hash_from_keys(&hash_keys) >> 1;
}
@@ -1847,12 +1858,17 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
+ struct flow_keys *flkeys = NULL, _flkeys;
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+ if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+ flkeys = &_flkeys;
+
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3a1775a62973..182db078f01e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1878,6 +1878,7 @@ static struct pernet_operations sit_net_ops = {
.exit_batch = sit_exit_batch_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
+ .async = true,
};
static void __exit sit_cleanup(void)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
{
xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
}
-
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f85f0d7480ac..a9673619e0e9 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
.exit = xfrm6_tunnel_net_exit,
.id = &xfrm6_tunnel_net_id,
.size = sizeof(struct xfrm6_tunnel_net),
+ .async = true,
};
static int __init xfrm6_tunnel_init(void)
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 9d5649e4e8b7..2c1c8b3e4452 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net)
static struct pernet_operations kcm_net_ops = {
.init = kcm_proc_init_net,
.exit = kcm_proc_exit_net,
+ .async = true,
};
int __init kcm_proc_init(void)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 435594648dac..a6cd0712e063 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -2015,6 +2015,7 @@ static struct pernet_operations kcm_net_ops = {
.exit = kcm_exit_net,
.id = &kcm_net_id,
.size = sizeof(struct kcm_net),
+ .async = true,
};
static int __init kcm_init(void)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7e2e7188e7f4..3ac08ab26207 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = {
.exit = pfkey_net_exit,
.id = &pfkey_net_id,
.size = sizeof(struct netns_pfkey),
+ .async = true,
};
static void __exit ipsec_pfkey_exit(void)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 99a03c72db4f..0c4f49a6a0cb 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1770,6 +1770,7 @@ static struct pernet_operations pppol2tp_net_ops = {
.init = pppol2tp_init_net,
.exit = pppol2tp_exit_net,
.id = &pppol2tp_net_id,
+ .async = true,
};
/*****************************************************************************
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index d625179de485..6a340c94c4b8 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
static struct pernet_operations ip_vs_lblc_ops = {
.init = __ip_vs_lblc_init,
.exit = __ip_vs_lblc_exit,
+ .async = true,
};
static int __init ip_vs_lblc_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 84c57b62a588..0627881128da 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
static struct pernet_operations ip_vs_lblcr_ops = {
.init = __ip_vs_lblcr_init,
.exit = __ip_vs_lblcr_exit,
+ .async = true,
};
static int __init ip_vs_lblcr_init(void)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 92139a087260..64b875e452ca 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = {
.exit = synproxy_net_exit,
.id = &synproxy_net_id,
.size = sizeof(struct synproxy_net),
+ .async = true,
};
static int __init synproxy_core_init(void)
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 66f5aca62a08..db2fe0911740 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1345,6 +1345,7 @@ static struct pernet_operations hashlimit_net_ops = {
.exit = hashlimit_net_exit,
.id = &hashlimit_net_id,
.size = sizeof(struct hashlimit_net),
+ .async = true,
};
static int __init hashlimit_mt_init(void)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 6d232d18faff..19efdb757944 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = {
.exit = recent_net_exit,
.id = &recent_net_id,
.size = sizeof(struct recent_net),
+ .async = true,
};
static struct xt_match recent_mt_reg[] __read_mostly = {
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 77787512fc32..9454e8393793 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = {
.exit = phonet_exit_net,
.id = &phonet_net_id,
.size = sizeof(struct phonet_net),
+ .async = true,
};
/* Initialize Phonet devices list */
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index a937f18896ae..f7126108a811 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
rds_send_drop_to(rs, NULL);
rds_rdma_drop_keys(rs);
rds_notify_queue_get(rs, NULL);
+ __skb_queue_purge(&rs->rs_zcookie_queue);
spin_lock_bh(&rds_sock_lock);
list_del_init(&rs->rs_item);
@@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
* - to signal that a previously congested destination may have become
* uncongested
* - A notification has been queued to the socket (this can be a congestion
- * update, or a RDMA completion).
+ * update, or a RDMA completion, or a MSG_ZEROCOPY completion).
*
* EPOLLOUT is asserted if there is room on the send queue. This does not mean
* however, that the next sendmsg() call will succeed. If the application tries
@@ -178,7 +179,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
spin_unlock(&rs->rs_lock);
}
if (!list_empty(&rs->rs_recv_queue) ||
- !list_empty(&rs->rs_notify_queue))
+ !list_empty(&rs->rs_notify_queue) ||
+ !skb_queue_empty(&rs->rs_zcookie_queue))
mask |= (EPOLLIN | EPOLLRDNORM);
if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
mask |= (EPOLLOUT | EPOLLWRNORM);
@@ -513,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
INIT_LIST_HEAD(&rs->rs_recv_queue);
INIT_LIST_HEAD(&rs->rs_notify_queue);
INIT_LIST_HEAD(&rs->rs_cong_list);
+ skb_queue_head_init(&rs->rs_zcookie_queue);
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;
diff --git a/net/rds/message.c b/net/rds/message.c
index 651834513481..116cf87ccb89 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -58,32 +58,26 @@ EXPORT_SYMBOL_GPL(rds_message_addref);
static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
{
- struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
- int ncookies;
- u32 *ptr;
+ struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
+ int ncookies = ck->num;
- if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
+ if (ncookies == RDS_MAX_ZCOOKIES)
return false;
- ncookies = serr->ee.ee_data;
- if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
- return false;
- ptr = skb_put(skb, sizeof(u32));
- *ptr = cookie;
- serr->ee.ee_data = ++ncookies;
+ ck->cookies[ncookies] = cookie;
+ ck->num = ++ncookies;
return true;
}
static void rds_rm_zerocopy_callback(struct rds_sock *rs,
struct rds_znotifier *znotif)
{
- struct sock *sk = rds_rs_to_sk(rs);
struct sk_buff *skb, *tail;
- struct sock_exterr_skb *serr;
unsigned long flags;
struct sk_buff_head *q;
u32 cookie = znotif->z_cookie;
+ struct rds_zcopy_cookies *ck;
- q = &sk->sk_error_queue;
+ q = &rs->rs_zcookie_queue;
spin_lock_irqsave(&q->lock, flags);
tail = skb_peek_tail(q);
@@ -91,22 +85,19 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
spin_unlock_irqrestore(&q->lock, flags);
mm_unaccount_pinned_pages(&znotif->z_mmp);
consume_skb(rds_skb_from_znotifier(znotif));
- sk->sk_error_report(sk);
+ /* caller invokes rds_wake_sk_sleep() */
return;
}
skb = rds_skb_from_znotifier(znotif);
- serr = SKB_EXT_ERR(skb);
- memset(&serr->ee, 0, sizeof(serr->ee));
- serr->ee.ee_errno = 0;
- serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
- serr->ee.ee_info = 0;
+ ck = (struct rds_zcopy_cookies *)skb->cb;
+ memset(ck, 0, sizeof(*ck));
WARN_ON(!skb_zcookie_add(skb, cookie));
__skb_queue_tail(q, skb);
spin_unlock_irqrestore(&q->lock, flags);
- sk->sk_error_report(sk);
+ /* caller invokes rds_wake_sk_sleep() */
mm_unaccount_pinned_pages(&znotif->z_mmp);
}
@@ -129,6 +120,7 @@ static void rds_message_purge(struct rds_message *rm)
if (rm->data.op_mmp_znotifier) {
zcopy = true;
rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+ rds_wake_sk_sleep(rs);
rm->data.op_mmp_znotifier = NULL;
}
sock_put(rds_rs_to_sk(rs));
@@ -362,10 +354,12 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
int total_copied = 0;
struct sk_buff *skb;
- skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
- GFP_KERNEL);
+ skb = alloc_skb(0, GFP_KERNEL);
if (!skb)
return -ENOMEM;
+ BUILD_BUG_ON(sizeof(skb->cb) <
+ max_t(int, sizeof(struct rds_znotifier),
+ sizeof(struct rds_zcopy_cookies)));
rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
length)) {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 31cd38852050..33b16353d8f3 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -603,6 +603,8 @@ struct rds_sock {
/* Socket receive path trace points*/
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+ struct sk_buff_head rs_zcookie_queue;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b080961464df..d50747725221 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -577,6 +577,32 @@ out:
return ret;
}
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+ struct sk_buff *skb;
+ struct sk_buff_head *q = &rs->rs_zcookie_queue;
+ struct rds_zcopy_cookies *done;
+
+ if (!msg->msg_control)
+ return false;
+
+ if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+ msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+ return false;
+
+ skb = skb_dequeue(q);
+ if (!skb)
+ return false;
+ done = (struct rds_zcopy_cookies *)skb->cb;
+ if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+ done)) {
+ skb_queue_head(q, skb);
+ return false;
+ }
+ consume_skb(skb);
+ return true;
+}
+
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int msg_flags)
{
@@ -611,7 +637,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (!rds_next_incoming(rs, &inc)) {
if (nonblock) {
- ret = -EAGAIN;
+ bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+ ret = reaped ? 0 : -EAGAIN;
break;
}
@@ -660,6 +688,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
ret = -EFAULT;
goto out;
}
+ rds_recvmsg_zcookie(rs, msg);
rds_stats_inc(s_recv_delivered);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index cb3c5d403c88..da72e0cf2b1f 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -413,6 +413,7 @@ static struct pernet_operations bpf_net_ops = {
.exit_batch = bpf_exit_net,
.id = &bpf_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init bpf_init_module(void)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e4b880fa51fe..371e5e4ab3e2 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -222,6 +222,7 @@ static struct pernet_operations connmark_net_ops = {
.exit_batch = connmark_exit_net,
.id = &connmark_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init connmark_init_module(void)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d5c2e528d150..1fb1f1f6a555 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -677,6 +677,7 @@ static struct pernet_operations csum_net_ops = {
.exit_batch = csum_exit_net,
.id = &csum_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_DESCRIPTION("Checksum updating actions");
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index f072bcf33760..74563254e676 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -247,6 +247,7 @@ static struct pernet_operations gact_net_ops = {
.exit_batch = gact_exit_net,
.id = &gact_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a5994cf0512b..555b1caeff72 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -870,6 +870,7 @@ static struct pernet_operations ife_net_ops = {
.exit_batch = ife_exit_net,
.id = &ife_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init ife_init_module(void)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9784629090ad..10866717f88e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -349,6 +349,7 @@ static struct pernet_operations ipt_net_ops = {
.exit_batch = ipt_exit_net,
.id = &ipt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
@@ -399,6 +400,7 @@ static struct pernet_operations xt_net_ops = {
.exit_batch = xt_exit_net,
.id = &xt_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fd34015331ab..64c86579c3d9 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -353,6 +353,7 @@ static struct pernet_operations mirred_net_ops = {
.exit_batch = mirred_exit_net,
.id = &mirred_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002)");
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4b5848b6c252..b1bc757f6491 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -323,6 +323,7 @@ static struct pernet_operations nat_net_ops = {
.exit_batch = nat_exit_net,
.id = &nat_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_DESCRIPTION("Stateless NAT actions");
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 094303c27c5e..5e8cc8f63acd 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -465,6 +465,7 @@ static struct pernet_operations pedit_net_ops = {
.exit_batch = pedit_exit_net,
.id = &pedit_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ff55bd6c7db0..51fe4fe343f7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -347,6 +347,7 @@ static struct pernet_operations police_net_ops = {
.exit_batch = police_exit_net,
.id = &police_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init police_init_module(void)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9765145aaf40..238dfd27e995 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -248,6 +248,7 @@ static struct pernet_operations sample_net_ops = {
.exit_batch = sample_exit_net,
.id = &sample_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init sample_init_module(void)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8244e221fe4f..91816d73f3f3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -216,6 +216,7 @@ static struct pernet_operations simp_net_ops = {
.exit_batch = simp_exit_net,
.id = &simp_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ddf69fc01bdf..7971510fe61b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -253,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = {
.exit_batch = skbedit_exit_net,
.id = &skbedit_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a406f191cb84..febec75f4f7a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -278,6 +278,7 @@ static struct pernet_operations skbmod_net_ops = {
.exit_batch = skbmod_exit_net,
.id = &skbmod_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 41ff9d0e5c62..9169b7e78ada 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -337,6 +337,7 @@ static struct pernet_operations tunnel_key_net_ops = {
.exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init tunnel_key_init_module(void)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 71411a255f04..c2ee7fd51cc9 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -313,6 +313,7 @@ static struct pernet_operations vlan_net_ops = {
.exit_batch = vlan_exit_net,
.id = &vlan_net_id,
.size = sizeof(struct tc_action_net),
+ .async = true,
};
static int __init vlan_init_module(void)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9d1a8bbf8152..19f9f421d5b7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1618,6 +1618,7 @@ static struct pernet_operations tcf_net_ops = {
.exit = tcf_net_exit,
.id = &tcf_net_id,
.size = sizeof(struct tcf_net),
+ .async = true,
};
static int __init tc_filter_init(void)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 27e672c12492..68f9d942bed4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
unsigned int len)
{
+ bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
* If child was empty even before update then backlog
* counter is screwed and we skip notification because
* parent class is already passive.
+ *
+ * If the original child was offloaded then it is allowed
+ * to be seem as empty, so the parent is notified anyway.
*/
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+ notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+ !qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index efbf51f35778..222e53d3d27a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
sch->q.qlen = 0;
}
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
{
- struct prio_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_prio_qopt_offload opt = {
.handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
- if (enable) {
+ if (qopt) {
opt.command = TC_PRIO_REPLACE;
- opt.replace_params.bands = q->bands;
- memcpy(&opt.replace_params.priomap, q->prio2band,
+ opt.replace_params.bands = qopt->bands;
+ memcpy(&opt.replace_params.priomap, qopt->priomap,
TC_PRIO_MAX + 1);
opt.replace_params.qstats = &sch->qstats;
} else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
struct prio_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- prio_offload(sch, false);
+ prio_offload(sch, NULL);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
}
+ prio_offload(sch, qopt);
sch_tree_lock(sch);
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_unlock(sch);
- prio_offload(sch, true);
return 0;
}
@@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct prio_sched_data *q = qdisc_priv(sch);
+ struct tc_prio_qopt_offload graft_offload;
+ struct net_device *dev = qdisc_dev(sch);
unsigned long band = arg - 1;
+ bool any_qdisc_is_offloaded;
+ int err;
if (new == NULL)
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->queues[band]);
+
+ if (!tc_can_offload(dev))
+ return 0;
+
+ graft_offload.handle = sch->handle;
+ graft_offload.parent = sch->parent;
+ graft_offload.graft_params.band = band;
+ graft_offload.graft_params.child_handle = new->handle;
+ graft_offload.command = TC_PRIO_GRAFT;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+ &graft_offload);
+
+ /* Don't report error if the graft is part of destroy operation. */
+ if (err && new != &noop_qdisc) {
+ /* Don't report error if the parent, the old child and the new
+ * one are not offloaded.
+ */
+ any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+ any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+ if (*old)
+ any_qdisc_is_offloaded |= (*old)->flags &
+ TCQ_F_OFFLOADED;
+
+ if (any_qdisc_is_offloaded)
+ NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+ }
+
return 0;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 38ae22b65e77..26684e086750 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,7 +7,6 @@
* applicable with RoCE-cards only
*
* Initial restrictions:
- * - non-blocking connect postponed
* - IPv6 support postponed
* - support for alternate links postponed
* - partial support for non-blocking sockets only
@@ -24,7 +23,6 @@
#include <linux/module.h>
#include <linux/socket.h>
-#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/sched/signal.h>
@@ -273,46 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
- __be32 *subnet, u8 *prefix_len)
-{
- struct dst_entry *dst = sk_dst_get(clcsock->sk);
- struct in_device *in_dev;
- struct sockaddr_in addr;
- int rc = -ENOENT;
-
- if (!dst) {
- rc = -ENOTCONN;
- goto out;
- }
- if (!dst->dev) {
- rc = -ENODEV;
- goto out_rel;
- }
-
- /* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addr);
- /* analyze IPv4 specific data of net_device belonging to TCP socket */
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(dst->dev);
- for_ifa(in_dev) {
- if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
- continue;
- *prefix_len = inet_mask_len(ifa->ifa_mask);
- *subnet = ifa->ifa_address & ifa->ifa_mask;
- rc = 0;
- break;
- } endfor_ifa(in_dev);
- rcu_read_unlock();
-
-out_rel:
- dst_release(dst);
-out:
- return rc;
-}
-
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link_group *lgr = smc->conn.lgr;
struct smc_link *link;
@@ -332,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
return rc;
}
+ if (link->llc_confirm_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
rc = smc_ib_modify_qp_rts(link);
if (rc)
return SMC_CLC_DECL_INTERR;
@@ -346,11 +308,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link,
link->smcibdev->mac[link->ibport - 1],
- gid, SMC_LLC_RESP);
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TCL;
- return rc;
+ /* receive ADD LINK request from server over RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ /* send add link reject message, only one link supported for now */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -372,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
- spin_lock_bh(&smc_lgr_list.lock);
- /* do not use this link group for new connections */
- if (!list_empty(&lgr->list))
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
-}
-
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
- struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
struct smc_clc_msg_accept_confirm aclc;
int local_contact = SMC_FIRST_CONTACT;
struct smc_ib_device *smcibdev;
@@ -438,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
srv_first_contact = aclc.hdr.flag;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
- ibport, &aclc.lcl, srv_first_contact);
+ local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+ srv_first_contact);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -498,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
if (local_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
- reason_code = smc_clnt_conf_first_link(
- smc, &smcibdev->gid[ibport - 1]);
+ reason_code = smc_clnt_conf_first_link(smc);
if (reason_code < 0) {
rc = reason_code;
goto out_err_unlock;
@@ -558,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
if (addr->sa_family != AF_INET)
goto out_err;
- smc->addr = addr; /* needed for nonblocking connect */
lock_sock(sk);
switch (sk->sk_state) {
@@ -748,9 +720,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE);
+ return rc;
}
- return rc;
+ if (link->llc_confirm_resp_rc)
+ return SMC_CLC_DECL_RMBE_EC;
+
+ /* send ADD LINK request to client over the RoCE fabric */
+ rc = smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ /* receive ADD LINK response from client over the RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+ SMC_LLC_WAIT_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ link->state = SMC_LNK_ACTIVE;
+
+ return 0;
}
/* setup for RDMA connection of server */
@@ -766,7 +763,6 @@ static void smc_listen_work(struct work_struct *work)
struct sock *newsmcsk = &new_smc->sk;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
- struct sockaddr_in peeraddr;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
@@ -808,7 +804,7 @@ static void smc_listen_work(struct work_struct *work)
}
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
+ rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
if (rc) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
@@ -822,13 +818,10 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
- /* get address of the peer connected to the internal TCP socket */
- kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
-
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc->lcl, 0);
+ local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+ 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 9518986c97b1..268cdf11533c 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -172,7 +172,6 @@ struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
- struct sockaddr *addr; /* inet connect address */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
@@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc)
struct smc_clc_msg_local;
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
- u8 *prefix_len);
void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8ac51583a063..874c5a75d6dd 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -11,6 +11,7 @@
*/
#include <linux/in.h>
+#include <linux/inetdevice.h>
#include <linux/if_ether.h>
#include <linux/sched/signal.h>
@@ -22,6 +23,9 @@
#include "smc_clc.h"
#include "smc_ib.h"
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
/* check if received message has a correct header length and contains valid
* heading and trailing eyecatchers
*/
@@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
return true;
}
+/* determine subnet and mask of internal TCP socket */
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
+ __be32 *subnet, u8 *prefix_len)
+{
+ struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct in_device *in_dev;
+ struct sockaddr_in addr;
+ int rc = -ENOENT;
+
+ if (!dst) {
+ rc = -ENOTCONN;
+ goto out;
+ }
+ if (!dst->dev) {
+ rc = -ENODEV;
+ goto out_rel;
+ }
+
+ /* get address to which the internal TCP socket is bound */
+ kernel_getsockname(clcsock, (struct sockaddr *)&addr);
+ /* analyze IPv4 specific data of net_device belonging to TCP socket */
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dst->dev);
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
+ continue;
+ *prefix_len = inet_mask_len(ifa->ifa_mask);
+ *subnet = ifa->ifa_address & ifa->ifa_mask;
+ rc = 0;
+ break;
+ } endfor_ifa(in_dev);
+ rcu_read_unlock();
+
+out_rel:
+ dst_release(dst);
+out:
+ return rc;
+}
+
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
@@ -211,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc,
memset(&pclc_prfx, 0, sizeof(pclc_prfx));
/* determine subnet and mask from internal TCP socket */
- rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
- &pclc_prfx.prefix_len);
+ rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ &pclc_prfx.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
pclc_prfx.ipv6_prefixes_cnt = 0;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c145a0f36a68..20e048beac30 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -22,9 +22,6 @@
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
#define SMC_CLC_V1 0x1 /* SMC version */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
@@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */
#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */
+#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
@@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
-struct smc_sock;
-struct smc_ib_device;
-
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
+ u8 *prefix_len);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2424c7100aaf..702ce5f85e97 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -144,7 +144,7 @@ free:
}
/* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
char *peer_systemid, unsigned short vlan_id)
{
@@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
}
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
lgr->sync_err = false;
- lgr->daddr = peer_in_addr;
memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
lgr->vlan_id = vlan_id;
rwlock_init(&lgr->sndbufs_lock);
@@ -177,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
lnk = &lgr->lnk[SMC_SINGLE_LINK];
/* initialize link */
+ lnk->state = SMC_LNK_ACTIVATING;
lnk->smcibdev = smcibdev;
lnk->ibport = ibport;
lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -198,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
goto destroy_qp;
init_completion(&lnk->llc_confirm);
init_completion(&lnk->llc_confirm_resp);
+ init_completion(&lnk->llc_add);
+ init_completion(&lnk->llc_add_resp);
smc->conn.lgr = lgr;
rwlock_init(&lgr->conns_lock);
@@ -306,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
kfree(lgr);
}
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* terminate linkgroup abnormally */
void smc_lgr_terminate(struct smc_link_group *lgr)
{
@@ -313,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
struct smc_sock *smc;
struct rb_node *node;
- spin_lock_bh(&smc_lgr_list.lock);
- if (list_empty(&lgr->list)) {
- /* termination already triggered */
- spin_unlock_bh(&smc_lgr_list.lock);
- return;
- }
- /* do not use this link group for new connections */
- list_del_init(&lgr->list);
- spin_unlock_bh(&smc_lgr_list.lock);
+ smc_lgr_forget(lgr);
write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
@@ -400,7 +403,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
}
/* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact)
{
@@ -457,7 +460,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
create:
if (local_contact == SMC_FIRST_CONTACT) {
- rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+ rc = smc_lgr_create(smc, smcibdev, ibport,
lcl->id_for_peer, vlan_id);
if (rc)
goto out;
@@ -698,27 +701,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
return -ENOSPC;
}
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
- struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
{
- u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
- struct smc_link_group *lgr = conn->lgr;
- u32 rkey = ntohl(clc->rmb_rkey);
+ u64 dma_addr = be64_to_cpu(nw_vaddr);
+ u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
(lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
test_bit(i, lgr->rtokens_used_mask)) {
- conn->rtoken_idx = i;
+ /* already in list */
+ return i;
+ }
+ }
+ i = smc_rmb_reserve_rtoken_idx(lgr);
+ if (i < 0)
+ return i;
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+ return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+ u32 rkey = ntohl(nw_rkey);
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+ test_bit(i, lgr->rtokens_used_mask)) {
+ lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+ lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+ clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
}
- conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+ return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc)
+{
+ conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+ clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
return 0;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fe691bf9af91..07e2a393e6d9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,6 +32,12 @@ enum smc_lgr_role { /* possible roles of a link group */
SMC_SERV /* server */
};
+enum smc_link_state { /* possible states of a link */
+ SMC_LNK_INACTIVE, /* link is inactive */
+ SMC_LNK_ACTIVATING, /* link is being activated */
+ SMC_LNK_ACTIVE /* link is active */
+};
+
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
struct smc_wr_buf {
@@ -87,8 +93,14 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
u8 link_id; /* unique # within link group */
+
+ enum smc_link_state state; /* state of link */
struct completion llc_confirm; /* wait for rx of conf link */
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+ int llc_confirm_rc; /* rc from confirm link msg */
+ int llc_confirm_resp_rc; /* rc from conf_resp msg */
+ struct completion llc_add; /* wait for rx of add link */
+ struct completion llc_add_resp; /* wait for rx of add link rsp*/
};
/* For now we just allow one parallel link per link group. The SMC protocol
@@ -124,7 +136,6 @@ struct smc_rtoken { /* address/key of remote RMB */
struct smc_link_group {
struct list_head list;
enum smc_lgr_role role; /* client or server */
- __be32 daddr; /* destination ip address */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
@@ -186,10 +197,13 @@ struct smc_sock;
struct smc_clc_msg_accept_confirm;
void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
int smc_buf_create(struct smc_sock *smc);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 92fe4cc8c82c..54e8d6dc9201 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -4,9 +4,6 @@
*
* Link Layer Control (LLC)
*
- * For now, we only support the necessary "confirm link" functionality
- * which happens for the first RoCE link after successful CLC handshake.
- *
* Copyright IBM Corp. 2016
*
* Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com>
@@ -21,6 +18,122 @@
#include "smc_clc.h"
#include "smc_llc.h"
+#define SMC_LLC_DATA_LEN 40
+
+struct smc_llc_hdr {
+ struct smc_wr_rx_hdr common;
+ u8 length; /* 44 */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved:4,
+ add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 add_link_rej_rsn:4,
+ reserved:4;
+#endif
+ u8 flags;
+};
+
+#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
+
+struct smc_llc_msg_confirm_link { /* type 0x01 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 link_uid[SMC_LGR_ID_SIZE];
+ u8 max_links;
+ u8 reserved[9];
+};
+
+#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS 2
+
+struct smc_llc_msg_add_link { /* type 0x02 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ u8 reserved2[2];
+ u8 sender_gid[SMC_GID_SIZE];
+ u8 sender_qp_num[3];
+ u8 link_num;
+ u8 flags2; /* QP mtu */
+ u8 initial_psn[3];
+ u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
+
+struct smc_llc_msg_del_link { /* type 0x04 */
+ struct smc_llc_hdr hd;
+ u8 link_num;
+ __be32 reason;
+ u8 reserved[35];
+} __packed; /* format defined in RFC7609 */
+
+struct smc_llc_msg_test_link { /* type 0x07 */
+ struct smc_llc_hdr hd;
+ u8 user_data[16];
+ u8 reserved[24];
+};
+
+struct smc_rmb_rtoken {
+ union {
+ u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */
+ /* is actually the num of rtokens, first */
+ /* rtoken is always for the current link */
+ u8 link_id; /* link id of the rtoken */
+ };
+ __be32 rmb_key;
+ __be64 rmb_vaddr;
+} __packed; /* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG 3
+
+struct smc_llc_msg_confirm_rkey { /* type 0x06 */
+ struct smc_llc_hdr hd;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+ u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX 8
+#define SMC_LLC_FLAG_RKEY_NEG 0x20
+
+struct smc_llc_msg_delete_rkey { /* type 0x09 */
+ struct smc_llc_hdr hd;
+ u8 num_rkeys;
+ u8 err_mask;
+ u8 reserved[2];
+ __be32 rkey[8];
+ u8 reserved2[4];
+};
+
+union smc_llc_msg {
+ struct smc_llc_msg_confirm_link confirm_link;
+ struct smc_llc_msg_add_link add_link;
+ struct smc_llc_msg_del_link delete_link;
+
+ struct smc_llc_msg_confirm_rkey confirm_rkey;
+ struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+ struct smc_llc_msg_delete_rkey delete_rkey;
+
+ struct smc_llc_msg_test_link test_link;
+ struct {
+ struct smc_llc_hdr hdr;
+ u8 data[SMC_LLC_DATA_LEN];
+ } raw;
+};
+
+#define SMC_LLC_FLAG_RESP 0x80
+
/********************************** send *************************************/
struct smc_llc_tx_pend {
@@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+ confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
if (reqresp == SMC_LLC_RESP)
confllc->hd.flags |= SMC_LLC_FLAG_RESP;
memcpy(confllc->sender_mac, mac, ETH_ALEN);
@@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
/* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+ confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+ union ib_gid *gid,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_add_link *addllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ addllc = (struct smc_llc_msg_add_link *)wr_buf;
+ memset(addllc, 0, sizeof(*addllc));
+ addllc->hd.common.type = SMC_LLC_ADD_LINK;
+ addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP) {
+ addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* always reject more links for now */
+ addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+ addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+ }
+ memcpy(addllc->sender_mac, mac, ETH_ALEN);
+ memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_del_link *delllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ delllc = (struct smc_llc_msg_del_link *)wr_buf;
+ memset(delllc, 0, sizeof(*delllc));
+ delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP)
+ delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ /* DEL_LINK_ALL because only 1 link supported */
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ delllc->link_num = link->link_id;
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_llc_msg_test_link *testllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ testllc = (struct smc_llc_msg_test_link *)wr_buf;
+ memset(testllc, 0, sizeof(*testllc));
+ testllc->hd.common.type = SMC_LLC_TEST_LINK;
+ testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+ if (reqresp == SMC_LLC_RESP)
+ testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ memcpy(wr_buf, llcbuf, llclen);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
@@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
struct smc_llc_msg_confirm_link *llc)
{
struct smc_link_group *lgr;
+ int conf_rc;
lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ /* RMBE eyecatchers are not supported */
+ if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+ conf_rc = 0;
+ else
+ conf_rc = ENOTSUPP;
+
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV)
+ if (lgr->role == SMC_SERV &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_resp_rc = conf_rc;
complete(&link->llc_confirm_resp);
+ }
} else {
- if (lgr->role == SMC_CLNT) {
+ if (lgr->role == SMC_CLNT &&
+ link->state == SMC_LNK_ACTIVATING) {
+ link->llc_confirm_rc = conf_rc;
link->link_id = llc->link_num;
complete(&link->llc_confirm);
}
}
}
+static void smc_llc_rx_add_link(struct smc_link *link,
+ struct smc_llc_msg_add_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (link->state == SMC_LNK_ACTIVATING)
+ complete(&link->llc_add_resp);
+ } else {
+ if (link->state == SMC_LNK_ACTIVATING) {
+ complete(&link->llc_add);
+ return;
+ }
+
+ if (lgr->role == SMC_SERV) {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+
+ } else {
+ smc_llc_send_add_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_RESP);
+ }
+ }
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+ struct smc_llc_msg_del_link *llc)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (lgr->role == SMC_SERV)
+ smc_lgr_terminate(lgr);
+ } else {
+ if (lgr->role == SMC_SERV) {
+ smc_lgr_forget(lgr);
+ smc_llc_send_delete_link(link, SMC_LLC_REQ);
+ } else {
+ smc_llc_send_delete_link(link, SMC_LLC_RESP);
+ smc_lgr_terminate(lgr);
+ }
+ }
+}
+
+static void smc_llc_rx_test_link(struct smc_link *link,
+ struct smc_llc_msg_test_link *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+ }
+}
+
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ int rc;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ rc = smc_rtoken_add(lgr,
+ llc->rtoken[0].rmb_vaddr,
+ llc->rtoken[0].rmb_key);
+
+ /* ignore rtokens for other links, we have only one link */
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ if (rc < 0)
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+ struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ /* ignore rtokens for other links, we have only one link */
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+ struct smc_llc_msg_delete_rkey *llc)
+{
+ struct smc_link_group *lgr;
+ u8 err_mask = 0;
+ int i, max;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ /* unused as long as we don't send this type of msg */
+ } else {
+ max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(lgr, llc->rkey[i]))
+ err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+ }
+
+ if (err_mask) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = err_mask;
+ }
+
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+ }
+}
+
static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
{
struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
return; /* short message */
if (llc->raw.hdr.length != sizeof(*llc))
return; /* invalid message */
- if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+ switch (llc->raw.hdr.common.type) {
+ case SMC_LLC_TEST_LINK:
+ smc_llc_rx_test_link(link, &llc->test_link);
+ break;
+ case SMC_LLC_CONFIRM_LINK:
smc_llc_rx_confirm_link(link, &llc->confirm_link);
+ break;
+ case SMC_LLC_ADD_LINK:
+ smc_llc_rx_add_link(link, &llc->add_link);
+ break;
+ case SMC_LLC_DELETE_LINK:
+ smc_llc_rx_delete_link(link, &llc->delete_link);
+ break;
+ case SMC_LLC_CONFIRM_RKEY:
+ smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+ break;
+ case SMC_LLC_CONFIRM_RKEY_CONT:
+ smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+ break;
+ case SMC_LLC_DELETE_RKEY:
+ smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+ break;
+ }
}
/***************************** init, exit, misc ******************************/
@@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
.type = SMC_LLC_CONFIRM_LINK
},
{
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_TEST_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_ADD_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_LINK
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_RKEY_CONT
+ },
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_DELETE_RKEY
+ },
+ {
.handler = NULL,
}
};
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 51b27ce90dbd..e4a7d5e234d5 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -18,6 +18,7 @@
#define SMC_LLC_FLAG_RESP 0x80
#define SMC_LLC_WAIT_FIRST_TIME (5 * HZ)
+#define SMC_LLC_WAIT_TIME (2 * HZ)
enum smc_llc_reqresp {
SMC_LLC_REQ,
@@ -26,39 +27,23 @@ enum smc_llc_reqresp {
enum smc_llc_msg_type {
SMC_LLC_CONFIRM_LINK = 0x01,
-};
-
-#define SMC_LLC_DATA_LEN 40
-
-struct smc_llc_hdr {
- struct smc_wr_rx_hdr common;
- u8 length; /* 44 */
- u8 reserved;
- u8 flags;
-};
-
-struct smc_llc_msg_confirm_link { /* type 0x01 */
- struct smc_llc_hdr hd;
- u8 sender_mac[ETH_ALEN];
- u8 sender_gid[SMC_GID_SIZE];
- u8 sender_qp_num[3];
- u8 link_num;
- u8 link_uid[SMC_LGR_ID_SIZE];
- u8 max_links;
- u8 reserved[9];
-};
-
-union smc_llc_msg {
- struct smc_llc_msg_confirm_link confirm_link;
- struct {
- struct smc_llc_hdr hdr;
- u8 data[SMC_LLC_DATA_LEN];
- } raw;
+ SMC_LLC_ADD_LINK = 0x02,
+ SMC_LLC_DELETE_LINK = 0x04,
+ SMC_LLC_CONFIRM_RKEY = 0x06,
+ SMC_LLC_TEST_LINK = 0x07,
+ SMC_LLC_CONFIRM_RKEY_CONT = 0x08,
+ SMC_LLC_DELETE_RKEY = 0x09,
};
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+ enum smc_llc_reqresp reqresp);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/socket.c b/net/socket.c
index ab58e57c09ca..d9a1ac233b35 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -233,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
return __put_user(klen, ulen);
}
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
@@ -2289,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (!sock)
return err;
- err = sock_error(sock->sk);
- if (err) {
- datagrams = err;
- goto out_put;
+ if (likely(!(flags & MSG_ERRQUEUE))) {
+ err = sock_error(sock->sk);
+ if (err) {
+ datagrams = err;
+ goto out_put;
+ }
}
entry = mmsg;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index ec3fc8d88e87..2c2a587e0942 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
hostprogs-y += xdp_monitor
hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
+hostprogs-y += cpustat
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -144,6 +146,7 @@ always += xdp_monitor_kern.o
always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o
+always += cpustat_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644
index 000000000000..68c84da065b1
--- /dev/null
+++ b/samples/bpf/cpustat_kern.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ * WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp |
+ * +--------------------------+
+ * | cstate index |
+ * +--------------------------+
+ * | pstate timestamp |
+ * +--------------------------+
+ * | pstate index |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME 0
+#define MAP_OFF_CSTATE_IDX 1
+#define MAP_OFF_PSTATE_TIME 2
+#define MAP_OFF_PSTATE_IDX 3
+#define MAP_OFF_NUM 4
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+ u64 pad;
+ u32 state;
+ u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+ u32 i;
+
+ for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+ if (frequency == cpu_opps[i])
+ return i;
+ }
+
+ return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+ u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ if (ctx->cpu_id > MAX_CPU)
+ return 0;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+ cts = bpf_map_lookup_elem(&my_map, &key);
+ if (!cts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ prev_state = *cstate;
+ *cstate = ctx->state;
+
+ if (!*cts) {
+ *cts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *cts;
+ *cts = cur_ts;
+
+ /*
+ * When state doesn't equal to (u32)-1, the cpu will enter
+ * one idle state; for this case we need to record interval
+ * for the pstate.
+ *
+ * OPP2
+ * +---------------------+
+ * OPP1 | |
+ * ---------+ |
+ * | Idle state
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ if (ctx->state != (u32)-1) {
+
+ /* record pstate after have first cpu_frequency event */
+ if (!*pts)
+ return 0;
+
+ delta = cur_ts - *pts;
+
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ /*
+ * When state equal to (u32)-1, the cpu just exits from one
+ * specific idle state; for this case we need to record
+ * interval for the pstate.
+ *
+ * OPP2
+ * -----------+
+ * | OPP1
+ * | +-----------
+ * | Idle state |
+ * +---------------------+
+ *
+ * |<- cstate duration ->|
+ * ^ ^
+ * cts cur_ts
+ */
+ } else {
+
+ key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+ val = bpf_map_lookup_elem(&cstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+ }
+
+ /* Update timestamp for pstate as new start time */
+ if (*pts)
+ *pts = cur_ts;
+
+ return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+ u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+ u32 key, cpu, pstate_idx;
+ u64 *val;
+
+ cpu = ctx->cpu_id;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+ pts = bpf_map_lookup_elem(&my_map, &key);
+ if (!pts)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+ pstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!pstate)
+ return 0;
+
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+ cstate = bpf_map_lookup_elem(&my_map, &key);
+ if (!cstate)
+ return 0;
+
+ prev_state = *pstate;
+ *pstate = ctx->state;
+
+ if (!*pts) {
+ *pts = bpf_ktime_get_ns();
+ return 0;
+ }
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = cur_ts - *pts;
+ *pts = cur_ts;
+
+ /* When CPU is in idle, bail out to skip pstate statistics */
+ if (*cstate != (u32)(-1))
+ return 0;
+
+ /*
+ * The cpu changes to another different OPP (in below diagram
+ * change frequency from OPP3 to OPP1), need recording interval
+ * for previous frequency OPP3 and update timestamp as start
+ * time for new frequency OPP1.
+ *
+ * OPP3
+ * +---------------------+
+ * OPP2 | |
+ * ---------+ |
+ * | OPP1
+ * +---------------
+ *
+ * |<- pstate duration ->|
+ * ^ ^
+ * pts cur_ts
+ */
+ pstate_idx = find_cpu_pstate_idx(*pstate);
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
+ return 0;
+
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, delta);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644
index 000000000000..2b4cd1ae57c5
--- /dev/null
+++ b/samples/bpf/cpustat_user.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU 8
+#define MAX_PSTATE_ENTRIES 5
+#define MAX_CSTATE_ENTRIES 3
+#define MAX_STARS 40
+
+#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ "208000"
+#define CPUFREQ_HIGHEST_FREQ "12000000"
+
+struct cpu_stat_data {
+ unsigned long cstate[MAX_CSTATE_ENTRIES];
+ unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+ int i, j;
+ char state_str[sizeof("cstate-9")];
+ struct cpu_stat_data *data;
+
+ /* Clear screen */
+ printf("\033[2J");
+
+ /* Header */
+ printf("\nCPU states statistics:\n");
+ printf("%-10s ", "state(ms)");
+
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ sprintf(state_str, "cstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ sprintf(state_str, "pstate-%d", i);
+ printf("%-11s ", state_str);
+ }
+
+ printf("\n");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ data = &stat_data[j];
+
+ printf("CPU-%-6d ", j);
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->cstate[i] / 1000000);
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+ printf("%-11ld ", data->pstate[i] / 1000000);
+
+ printf("\n");
+ }
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+ unsigned long key, value;
+ int c, i;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+ key = c * MAX_CSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(cstate_fd, &key, &value);
+ stat_data[c].cstate[i] = value;
+ }
+
+ for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+ key = c * MAX_PSTATE_ENTRIES + i;
+ bpf_map_lookup_elem(pstate_fd, &key, &value);
+ stat_data[c].pstate[i] = value;
+ }
+ }
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+ int rcpu, i, ret;
+ cpu_set_t cpumask;
+ cpu_set_t original_cpumask;
+
+ ret = sysconf(_SC_NPROCESSORS_CONF);
+ if (ret < 0)
+ return -1;
+
+ rcpu = sched_getcpu();
+ if (rcpu < 0)
+ return -1;
+
+ /* Keep track of the CPUs we will run on */
+ sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+ for (i = 0; i < ret; i++) {
+
+ /* Pointless to wake up ourself */
+ if (i == rcpu)
+ continue;
+
+ /* Pointless to wake CPUs we will not run on */
+ if (!CPU_ISSET(i, &original_cpumask))
+ continue;
+
+ CPU_ZERO(&cpumask);
+ CPU_SET(i, &cpumask);
+
+ sched_setaffinity(0, sizeof(cpumask), &cpumask);
+ }
+
+ /* Enable all the CPUs of the original mask */
+ sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+ return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+ int len, fd;
+
+ fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+ if (fd < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ return fd;
+ }
+
+ len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+ len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+ if (len < 0) {
+ printf("failed to open scaling_max_freq, errno=%d\n", errno);
+ goto err;
+ }
+
+err:
+ close(fd);
+ return len;
+}
+
+static void int_exit(int sig)
+{
+ cpu_stat_inject_cpu_idle_event();
+ cpu_stat_inject_cpu_frequency_event();
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ int ret;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ ret = cpu_stat_inject_cpu_idle_event();
+ if (ret < 0)
+ return 1;
+
+ ret = cpu_stat_inject_cpu_frequency_event();
+ if (ret < 0)
+ return 1;
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ while (1) {
+ cpu_stat_update(map_fd[1], map_fd[2]);
+ cpu_stat_print();
+ sleep(5);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index d54e91eb6cbf..b701b5c21342 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -20,6 +20,7 @@
#include <string.h>
#include <unistd.h>
#include <libgen.h>
+#include <sys/resource.h>
#include "bpf_load.h"
#include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
int main(int argc, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
const char *optstr = "SN";
char filename[256];
int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
return 1;
}
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
ifindex_in = strtoul(argv[optind], NULL, 0);
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
printf("input: %d output: %d\n", ifindex_in, ifindex_out);
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 73f1da4d116c..9bf2881bd11b 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -2,7 +2,7 @@
hostprogs-y := sockmap
# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7c25c0c112bc..95a54a89a532 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -566,6 +566,7 @@ run:
else
fprintf(stderr, "unknown test\n");
out:
+ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
close(s1);
close(s2);
close(p1);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8644d864e3c1..b4d7b6242a40 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net)
static struct pernet_operations selinux_net_ops = {
.init = selinux_nf_register,
.exit = selinux_nf_unregister,
+ .async = true,
};
static int __init selinux_nf_ip_init(void)
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index e36d17835d4f..3f29c03162ca 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net)
static struct pernet_operations smack_net_ops = {
.init = smack_nf_register,
.exit = smack_nf_unregister,
+ .async = true,
};
static int __init smack_nf_ip_init(void)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5c43c187f27c..8567a858b789 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,12 +35,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open
include ../lib.mk
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
$(TEST_GEN_PROGS): $(BPFOBJ)
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
.PHONY: force
# force a rebuild of BPFOBJ when its dependencies are updated
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 95a370f3d378..5d73db416460 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -11,6 +11,8 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -42,6 +44,7 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
int main(int argc, char **argv)
{
+ struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
const char *file = "test_tcpbpf_kern.o";
struct tcpbpf_globals g = {0};
int cg_fd, prog_fd, map_fd;
@@ -54,6 +57,9 @@ int main(int argc, char **argv)
int pid;
int rv;
+ if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
+ perror("Unable to lift memlock rlimit");
+
if (argc > 1 && strcmp(argv[1], "-d") == 0)
debug_flag = true;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c73592fa3d41..2164d218322e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -57,6 +57,9 @@
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
@@ -11163,6 +11166,95 @@ static struct bpf_test tests[] = {
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
+ {
+ "jit: lsh, rsh, arsh by 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 0xff),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+ BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: various mul tests",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+
};
static int probe_filter_length(const struct bpf_insn *fp)
@@ -11317,7 +11409,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
goto fail_log;
}
if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
- printf("FAIL\nUnexpected error message!\n");
+ printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+ expected_err, bpf_vlog);
goto fail_log;
}
}
@@ -11401,9 +11494,20 @@ out:
return ret;
}
+static void get_unpriv_disabled()
+{
+ char buf[2];
+ FILE *fd;
+
+ fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+ if (fgets(buf, 2, fd) == buf && atoi(buf))
+ unpriv_disabled = true;
+ fclose(fd);
+}
+
static int do_test(bool unpriv, unsigned int from, unsigned int to)
{
- int i, passes = 0, errors = 0;
+ int i, passes = 0, errors = 0, skips = 0;
for (i = from; i < to; i++) {
struct bpf_test *test = &tests[i];
@@ -11411,7 +11515,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
/* Program types that are not supported by non-root we
* skip right away.
*/
- if (!test->prog_type) {
+ if (!test->prog_type && unpriv_disabled) {
+ printf("#%d/u %s SKIP\n", i, test->descr);
+ skips++;
+ } else if (!test->prog_type) {
if (!unpriv)
set_admin(false);
printf("#%d/u %s ", i, test->descr);
@@ -11420,13 +11527,17 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
set_admin(true);
}
- if (!unpriv) {
+ if (unpriv) {
+ printf("#%d/p %s SKIP\n", i, test->descr);
+ skips++;
+ } else {
printf("#%d/p %s ", i, test->descr);
do_test_single(test, false, &passes, &errors);
}
}
- printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+ printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+ skips, errors);
return errors ? EXIT_FAILURE : EXIT_SUCCESS;
}
@@ -11454,6 +11565,13 @@ int main(int argc, char **argv)
}
}
+ get_unpriv_disabled();
+ if (unpriv && unpriv_disabled) {
+ printf("Cannot run as unprivileged user with sysctl %s.\n",
+ UNPRIV_SYSCTL);
+ return EXIT_FAILURE;
+ }
+
setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
return do_test(unpriv, from, to);
}
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index d7c30d366935..229a038966e3 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000000..a793eef5b876
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000000..4a0964c42860
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+ br0
+ +
+ vrf-h1 | vrf-h2
+ + +---+----+ +
+ | | | |
+ 192.0.2.1/24 + + + + 192.0.2.2/24
+ swp1 swp2 swp3 swp4
+ + + + +
+ | | | |
+ +--------+ +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+ of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+ RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+ multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000000..75d922438bc9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000000..5cd2aed97958
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000000..ab235c124f20
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000000..d0af52109360
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,540 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+
+if [[ -f forwarding.config ]]; then
+ source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+ tc -j &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing JSON support"
+ exit 1
+ fi
+
+ tc filter help 2>&1 | grep block &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing shared block support"
+ exit 1
+ fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+ check_tc_version
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+ echo "SKIP: jq not installed"
+ exit 1
+fi
+
+if [[ ! -x "$(command -v $MZ)" ]]; then
+ echo "SKIP: $MZ not installed"
+ exit 0
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit 0
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+ if [[ "$count" -eq "0" ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ fi
+ count=$((count + 1))
+ NETIFS[p$count]="$1"
+ shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: could not find all required interfaces"
+ exit 0
+ fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -ne 0 ]]; then
+ RET=$err
+ retmsg=$msg
+ fi
+}
+
+check_fail()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -eq 0 ]]; then
+ RET=1
+ retmsg=$msg
+ fi
+}
+
+log_test()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ if [[ $# -eq 2 ]]; then
+ opt_str="($opt_str)"
+ fi
+
+ if [[ $RET -ne 0 ]]; then
+ EXIT_STATUS=1
+ printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str"
+ if [[ ! -z "$retmsg" ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ return 1
+ fi
+
+ printf "TEST: %-60s [PASS]\n" "$test_name $opt_str"
+ return 0
+}
+
+log_info()
+{
+ local msg=$1
+
+ echo "INFO: $msg"
+}
+
+setup_wait()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ while true; do
+ ip link show dev ${NETIFS[p$i]} up \
+ | grep 'state UP' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+ done
+
+ # Make sure links are ready.
+ sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+ if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+ echo "Pausing before cleanup, hit any key to continue"
+ read
+ fi
+}
+
+vrf_prepare()
+{
+ ip -4 rule add pref 32765 table local
+ ip -4 rule del pref 0
+ ip -6 rule add pref 32765 table local
+ ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+ ip -6 rule add pref 0 table local
+ ip -6 rule del pref 32765
+ ip -4 rule add pref 0 table local
+ ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+ local vrf_name=$1
+
+ __last_tb_id=$((__last_tb_id + 1))
+ __TB_IDS[$vrf_name]=$__last_tb_id
+ return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+ local vrf_name=$1
+
+ return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_assign $vrf_name
+ tb_id=$?
+
+ ip link add dev $vrf_name type vrf table $tb_id
+ ip -4 route add table $tb_id unreachable default metric 4278198272
+ ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_lookup $vrf_name
+ tb_id=$?
+
+ ip -6 route del table $tb_id unreachable default metric 4278198272
+ ip -4 route del table $tb_id unreachable default metric 4278198272
+ ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+ local if_name=$1
+ local add_del=$2
+ local array
+
+ shift
+ shift
+ array=("${@}")
+
+ for addrstr in "${array[@]}"; do
+ ip address $add_del $addrstr dev $if_name
+ done
+}
+
+simple_if_init()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ __addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ __addr_add_del $if_name del "${array[@]}"
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+ local if_name=$1
+
+ ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+ local bridge=$1
+ local ageing_time
+
+ # Need to divide by 100 to convert to seconds.
+ ageing_time=$(ip -j -d link show dev $bridge \
+ | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+ echo $((ageing_time / 100))
+}
+
+forwarding_enable()
+{
+ ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+ ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+ sysctl -q -w net.ipv4.conf.all.forwarding=1
+ sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+ sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+ sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
+tc_offload_check()
+{
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ ethtool -k ${NETIFS[p$i]} \
+ | grep "hw-tc-offload: on" &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping"
+}
+
+ping6_test()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ RET=0
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ check_err $?
+ log_test "ping6"
+}
+
+learning_test()
+{
+ local bridge=$1
+ local br_port1=$2 # Connected to `host1_if`.
+ local host1_if=$3
+ local host2_if=$4
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ RET=0
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ # Disable unknown unicast flooding on `br_port1` to make sure
+ # packets are only forwarded through the port after a matching
+ # FDB entry was installed.
+ bridge link set dev $br_port1 flood off
+
+ tc qdisc add dev $host1_if ingress
+ tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_fail $? "Packet reached second host when should not"
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet did not reach second host when should"
+
+ # Wait for 10 seconds after the ageing time to make sure FDB
+ # record was aged-out.
+ ageing_time=$(bridge_ageing_time_get $bridge)
+ sleep $((ageing_time + 10))
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning off
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning on
+
+ tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host1_if ingress
+
+ bridge link set dev $br_port1 flood on
+
+ log_test "FDB learning"
+}
+
+flood_test_do()
+{
+ local should_flood=$1
+ local mac=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local err=0
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was flooded to it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -ne 0 && $should_flood == "true" || \
+ $? -eq 0 && $should_flood == "false" ]]; then
+ err=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $err
+}
+
+flood_unicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+
+ bridge link set dev $br_port flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=01:00:5e:00:00:01
+ local ip=239.0.0.1
+
+ RET=0
+
+ bridge link set dev $br_port mcast_flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port mcast_flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+ # `br_port` is connected to `host2_if`
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+
+ flood_unicast_test $br_port $host1_if $host2_if
+ flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000000..cc6a14abfa87
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+ ip address add 2001:db8:1::1/64 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 2001:db8:2::1/64 dev $rp2
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 2001:db8:1::1/64 dev $rp1
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000000..55595305a604
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+
+ ip route add 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ ip route add 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+
+ ip route add 192.0.2.0/24 vrf vrf-r2 \
+ nexthop via 169.254.2.12 dev $rp22 \
+ nexthop via 169.254.3.13 dev $rp23
+ ip route add 2001:db8:1::/64 vrf vrf-r2 \
+ nexthop via fe80:2::12 dev $rp22 \
+ nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+multipath_eval()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local packets_rp12=$4
+ local packets_rp13=$5
+ local weights_ratio packets_ratio diff
+
+ RET=0
+
+ if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+ check_err 1 "Packet difference is 0"
+ log_test "Multipath"
+ log_info "Expected ratio $weights_ratio"
+ return
+ fi
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+ | bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+ | bc -l)
+ else
+ weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+ bc -l)
+ packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+ bc -l)
+ fi
+
+ diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+ diff=${diff#-}
+
+ test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+ check_err $? "Too large discrepancy between expected and measured ratios"
+ log_test "$desc"
+ log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+ local hash_policy
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath6_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+multipath_test
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000000..8ab5cf0a960b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+
+ simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.1/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_redirect_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched without redirect rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match incoming redirected packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "mirred egress redirect ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not dropped"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action ok
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+ tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_fail $? "Saw packet without trap rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action trap
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not trapped"
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swp1origmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp1 address $h2mac
+ ip link set $swp2 address $h1mac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_redirect_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ gact_drop_and_ok_test
+ mirred_egress_redirect_test
+ gact_trap_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000000..2fd15226974b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_fail $? "matched on filter in unreachable chain"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $h2mac action goto chain 1
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter with goto chain action"
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_err $? "Did not match on correct filter in chain 1"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ unreachable_chain_test
+ gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000000..9d3b64a2a264
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+ local ret
+
+ output="$(tc -j -s filter show $id)"
+ # workaround the jq bug which causes jq to return 0 in case input is ""
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output | \
+ jq -e ".[] \
+ | select(.options.handle == $handle) \
+ | select(.options.actions[0].stats.packets == $count)" \
+ &> /dev/null
+ return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000000..032b882adfc0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_mac $h1mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_ip 198.51.100.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_ip 192.0.2.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags src_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ match_dst_mac_test
+ match_src_mac_test
+ match_dst_ip_test
+ match_src_ip_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000000..077b98048ef4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+ simple_if_init $swp2 192.0.2.2/24
+ tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ simple_if_fini $swp2 192.0.2.2/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+ RET=0
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 1
+ check_err $? "Did not match first incoming packet on a block"
+
+ $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 2
+ check_err $? "Did not match second incoming packet on a block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp2 address $swmac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ shared_block_test
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 5cc2a53bb71c..406cc70c571d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -344,27 +344,53 @@ static int do_setup_tx(int domain, int type, int protocol)
return fd;
}
-static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
- uint32_t *ckbuf, size_t nbytes)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
{
- int ncookies, i;
+ int i;
- if (serr->ee_errno != 0)
- error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
- ncookies = serr->ee_data;
- if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
+ if (ck->num > RDS_MAX_ZCOOKIES)
error(1, 0, "Returned %d cookies, max expected %d\n",
- ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
- if (nbytes != ncookies * sizeof(uint32_t))
- error(1, 0, "Expected %d cookies, got %ld\n",
- ncookies, nbytes/sizeof(uint32_t));
- for (i = 0; i < ncookies; i++)
+ ck->num, RDS_MAX_ZCOOKIES);
+ for (i = 0; i < ck->num; i++)
if (cfg_verbose >= 2)
- fprintf(stderr, "%d\n", ckbuf[i]);
- return ncookies;
+ fprintf(stderr, "%d\n", ck->cookies[i]);
+ return ck->num;
}
-static bool do_recv_completion(int fd)
+static bool do_recvmsg_completion(int fd)
+{
+ char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+ struct rds_zcopy_cookies *ck;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ bool ret = false;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ if (recvmsg(fd, &msg, MSG_DONTWAIT))
+ return ret;
+
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, errno, "recvmsg notification: truncated");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_RDS &&
+ cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+ ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+ completions += do_process_zerocopy_cookies(ck);
+ ret = true;
+ break;
+ }
+ error(0, 0, "ignoring cmsg at level %d type %d\n",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+ return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
{
struct sock_extended_err *serr;
struct msghdr msg = {};
@@ -372,17 +398,13 @@ static bool do_recv_completion(int fd)
uint32_t hi, lo, range;
int ret, zerocopy;
char control[100];
- uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
- struct iovec iov;
+
+ if (domain == PF_RDS)
+ return do_recvmsg_completion(fd);
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
- iov.iov_base = ckbuf;
- iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
-
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
return false;
@@ -402,10 +424,6 @@ static bool do_recv_completion(int fd)
serr = (void *) CMSG_DATA(cm);
- if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
- completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
- return true;
- }
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
if (serr->ee_errno != 0)
@@ -440,20 +458,20 @@ static bool do_recv_completion(int fd)
}
/* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
{
- while (do_recv_completion(fd)) {}
+ while (do_recv_completion(fd, domain)) {}
}
/* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
{
int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
while (completions < expected_completions &&
gettimeofday_ms() < tstop) {
- if (do_poll(fd, POLLERR))
- do_recv_completions(fd);
+ if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+ do_recv_completions(fd, domain);
}
if (completions < expected_completions)
@@ -534,13 +552,13 @@ static void do_tx(int domain, int type, int protocol)
while (!do_poll(fd, POLLOUT)) {
if (cfg_zerocopy)
- do_recv_completions(fd);
+ do_recv_completions(fd, domain);
}
} while (gettimeofday_ms() < tstop);
if (cfg_zerocopy)
- do_recv_remaining_completions(fd);
+ do_recv_remaining_completions(fd, domain);
if (close(fd))
error(1, errno, "close");
@@ -631,40 +649,6 @@ static void do_flush_datagram(int fd, int type)
bytes += cfg_payload_len;
}
-
-static void do_recvmsg(int fd)
-{
- int ret, off = 0;
- char *buf;
- struct iovec iov;
- struct msghdr msg;
- struct sockaddr_storage din;
-
- buf = calloc(cfg_payload_len, sizeof(char));
- iov.iov_base = buf;
- iov.iov_len = cfg_payload_len;
-
- memset(&msg, 0, sizeof(msg));
- msg.msg_name = &din;
- msg.msg_namelen = sizeof(din);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
-
- ret = recvmsg(fd, &msg, MSG_TRUNC);
-
- if (ret == -1)
- error(1, errno, "recv");
- if (ret != cfg_payload_len)
- error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
-
- if (memcmp(buf + off, payload, ret))
- error(1, 0, "recv: data mismatch");
-
- free(buf);
- packets++;
- bytes += cfg_payload_len;
-}
-
static void do_rx(int domain, int type, int protocol)
{
uint64_t tstop;
@@ -676,8 +660,6 @@ static void do_rx(int domain, int type, int protocol)
do {
if (type == SOCK_STREAM)
do_flush_tcp(fd);
- else if (domain == PF_RDS)
- do_recvmsg(fd);
else
do_flush_datagram(fd, type);
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index b3754b9aa302..7b50775abaf6 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -15,6 +15,7 @@ import importlib
import json
import subprocess
import time
+import traceback
from collections import OrderedDict
from string import Template
@@ -23,6 +24,13 @@ from tdc_helper import *
import TdcPlugin
+
+class PluginMgrTestFail(Exception):
+ def __init__(self, stage, output, message):
+ self.stage = stage
+ self.output = output
+ self.message = message
+
class PluginMgr:
def __init__(self, argparser):
super().__init__()
@@ -135,7 +143,7 @@ def exec_cmd(args, pm, stage, command):
return proc, foutput
-def prepare_env(args, pm, stage, prefix, cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
"""
Execute the setup/teardown commands for a test case.
Optionally terminate test execution if the command fails.
@@ -164,7 +172,9 @@ def prepare_env(args, pm, stage, prefix, cmdlist):
print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
- raise Exception('"{}" did not complete successfully'.format(prefix))
+ raise PluginMgrTestFail(
+ stage, output,
+ '"{}" did not complete successfully'.format(prefix))
def run_one_test(pm, args, index, tidx):
result = True
@@ -194,8 +204,11 @@ def run_one_test(pm, args, index, tidx):
match_pattern = re.compile(
str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
(p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
- match_index = re.findall(match_pattern, procout)
- if len(match_index) != int(tidx["matchCount"]):
+ if procout:
+ match_index = re.findall(match_pattern, procout)
+ if len(match_index) != int(tidx["matchCount"]):
+ result = False
+ elif int(tidx["matchCount"]) != 0:
result = False
if not result:
@@ -204,9 +217,12 @@ def run_one_test(pm, args, index, tidx):
tap += tresult
if result == False:
- tap += procout
+ if procout:
+ tap += procout
+ else:
+ tap += 'No output!\n'
- prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'])
+ prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
pm.call_post_case()
index += 1
@@ -227,30 +243,70 @@ def test_runner(pm, args, filtered_tests):
index = 1
tap = str(index) + ".." + str(tcount) + "\n"
badtest = None
+ stage = None
+ emergency_exit = False
+ emergency_exit_message = ''
- pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
-
+ try:
+ pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+ except Exception as ee:
+ ex_type, ex, ex_tb = sys.exc_info()
+ print('Exception {} {} (caught in pre_suite).'.
+ format(ex_type, ex))
+ # when the extra print statements are uncommented,
+ # the traceback does not appear between them
+ # (it appears way earlier in the tdc.py output)
+ # so don't bother ...
+ # print('--------------------(')
+ # print('traceback')
+ traceback.print_tb(ex_tb)
+ # print('--------------------)')
+ emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+ emergency_exit = True
+ stage = 'pre-SUITE'
+
+ if emergency_exit:
+ pm.call_post_suite(index)
+ return emergency_exit_message
if args.verbose > 1:
- print('Run tests here')
+ print('give test rig 2 seconds to stabilize')
+ time.sleep(2)
for tidx in testlist:
if "flower" in tidx["category"] and args.device == None:
+ if args.verbose > 1:
+ print('Not executing test {} {} because DEV2 not defined'.
+ format(tidx['id'], tidx['name']))
continue
try:
badtest = tidx # in case it goes bad
tap += run_one_test(pm, args, index, tidx)
- except Exception as ee:
- print('Exception {} (caught in test_runner, running test {} {} {})'.
- format(ee, index, tidx['id'], tidx['name']))
+ except PluginMgrTestFail as pmtf:
+ ex_type, ex, ex_tb = sys.exc_info()
+ stage = pmtf.stage
+ message = pmtf.message
+ output = pmtf.output
+ print(message)
+ print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+ format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+ print('---------------')
+ print('traceback')
+ traceback.print_tb(ex_tb)
+ print('---------------')
+ if stage == 'teardown':
+ print('accumulated output for this test:')
+ if pmtf.output:
+ print(pmtf.output)
+ print('---------------')
break
index += 1
# if we failed in setup or teardown,
- # fill in the remaining tests with not ok
+ # fill in the remaining tests with ok-skipped
count = index
tap += 'about to flush the tap output if tests need to be skipped\n'
if tcount + 1 != index:
for tidx in testlist[index - 1:]:
- msg = 'skipped - previous setup or teardown failed'
+ msg = 'skipped - previous {} failed'.format(stage)
tap += 'ok {} - {} # {} {} {}\n'.format(
count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
count += 1
@@ -347,9 +403,9 @@ def check_default_settings(args, remaining, pm):
global NAMES
if args.path != None:
- NAMES['TC'] = args.path
+ NAMES['TC'] = args.path
if args.device != None:
- NAMES['DEV2'] = args.device
+ NAMES['DEV2'] = args.device
if not os.path.isfile(NAMES['TC']):
print("The specified tc path " + NAMES['TC'] + " does not exist.")
exit(1)
@@ -389,7 +445,7 @@ def generate_case_ids(alltests):
for c in alltests:
if (c["id"] == ""):
while True:
- newid = str('%04x' % random.randrange(16**4))
+ newid = str('{:04x}'.format(random.randrange(16**4)))
if (does_id_exist(alltests, newid)):
continue
else:
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 707c6bfef689..52fa539dc662 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -49,13 +49,13 @@ index = 0
for i in range(0x100):
for j in range(0x100):
for k in range(0x100):
- mac = ("%02x:%02x:%02x" % (i, j, k))
+ mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
src_mac = "e4:11:00:" + mac
dst_mac = "e4:12:00:" + mac
- cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
- "src_mac %s dst_mac %s action drop %s" %
+ cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+ "src_mac {} dst_mac {} action drop {}".format
(device, prio, skip, src_mac, dst_mac, share_action))
- file.write("%s\n" % cmd)
+ file.write("{}\n".format(cmd))
index += 1
if index >= number:
file.close()