diff options
265 files changed, 16758 insertions, 3601 deletions
diff --git a/Documentation/devicetree/bindings/net/brcm,amac.txt b/Documentation/devicetree/bindings/net/brcm,amac.txt index ba5ecc1041a5..2fefa1a44afd 100644 --- a/Documentation/devicetree/bindings/net/brcm,amac.txt +++ b/Documentation/devicetree/bindings/net/brcm,amac.txt @@ -2,11 +2,17 @@ Broadcom AMAC Ethernet Controller Device Tree Bindings ------------------------------------------------------------- Required properties: - - compatible: "brcm,amac" or "brcm,nsp-amac" - - reg: Address and length of the GMAC registers, - Address and length of the GMAC IDM registers - - reg-names: Names of the registers. Must have both "amac_base" and - "idm_base" + - compatible: "brcm,amac" + "brcm,nsp-amac" + "brcm,ns2-amac" + - reg: Address and length of the register set for the device. It + contains the information of registers in the same order as + described by reg-names + - reg-names: Names of the registers. + "amac_base": Address and length of the GMAC registers + "idm_base": Address and length of the GMAC IDM registers + "nicpm_base": Address and length of the NIC Port Manager + registers (required for Northstar2) - interrupts: Interrupt number Optional properties: diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt index d4b7f2e49984..abfbeecbcf39 100644 --- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt +++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt @@ -45,6 +45,12 @@ Required properties: depends on the hardware user manual. - port-mode-offset: is offset of port mode field for each port in dsaf. Its value depends on the hardware user manual. +- mc-mac-mask: mask of multicast address, determines bit in multicast address + to set: + 1 stands for this bit will be precisely matched, TCAM will check this bit of + MAC address. + 0 stands for this bit will be fuzzy matched, TCAM won't care about this bit + of MAC address. [1] Documentation/devicetree/bindings/net/phy.txt @@ -74,10 +80,12 @@ dsaf0: dsa@c7000000 { reg = 0; phy-handle = <&phy0>; serdes-syscon = <&serdes>; + mc-mac-mask = [ff f0 00 00 00 00]; }; port@1 { reg = 1; serdes-syscon = <&serdes>; + mc-mac-mask = [ff f0 00 00 00 00]; }; }; diff --git a/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt index 8516929c7251..065e8bdb957d 100644 --- a/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt +++ b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt @@ -3,7 +3,7 @@ Properties for an MDIO bus multiplexer controlled by a memory-mapped device This is a special case of a MDIO bus multiplexer. A memory-mapped device, like an FPGA, is used to control which child bus is connected. The mdio-mux node must be a child of the memory-mapped device. The driver currently only -supports devices with eight-bit registers. +supports devices with 8, 16 or 32-bit registers. Required properties in addition to the generic multiplexer properties: @@ -11,7 +11,7 @@ Required properties in addition to the generic multiplexer properties: - reg : integer, contains the offset of the register that controls the bus multiplexer. The size field in the 'reg' property is the size of - register, and must therefore be 1. + register, and must therefore be 1, 2, or 4. - mux-mask : integer, contains an eight-bit mask that specifies which bits in the register control the actual bus multiplexer. The diff --git a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt new file mode 100644 index 000000000000..df0534e2eda1 --- /dev/null +++ b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt @@ -0,0 +1,39 @@ +* Oxford Semiconductor OXNAS DWMAC Ethernet controller + +The device inherits all the properties of the dwmac/stmmac devices +described in the file stmmac.txt in the current directory with the +following changes. + +Required properties on all platforms: + +- compatible: For the OX820 SoC, it should be : + - "oxsemi,ox820-dwmac" to select glue + - "snps,dwmac-3.512" to select IP version. + +- clocks: Should contain phandles to the following clocks +- clock-names: Should contain the following: + - "stmmaceth" for the host clock - see stmmac.txt + - "gmac" for the peripheral gate clock + +- oxsemi,sys-ctrl: a phandle to the system controller syscon node + +Example : + +etha: ethernet@40400000 { + compatible = "oxsemi,ox820-dwmac", "snps,dwmac-3.512"; + reg = <0x40400000 0x2000>; + interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq"; + mac-address = [000000000000]; /* Filled in by U-Boot */ + phy-mode = "rgmii"; + + clocks = <&stdclk CLK_820_ETHA>, <&gmacclk>; + clock-names = "gmac", "stmmaceth"; + resets = <&reset RESET_MAC>; + + /* Regmap for sys registers */ + oxsemi,sys-ctrl = <&sys>; + + status = "disabled"; +}; diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt index bc1c3c8bf8fa..4627da3d52c4 100644 --- a/Documentation/devicetree/bindings/net/phy.txt +++ b/Documentation/devicetree/bindings/net/phy.txt @@ -35,6 +35,10 @@ Optional Properties: - broken-turn-around: If set, indicates the PHY device does not correctly release the turn around line low at the end of a MDIO transaction. +- enet-phy-lane-swap: If set, indicates the PHY will swap the TX/RX lanes to + compensate for the board being designed with the lanes swapped. + + Example: ethernet-phy@0 { diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3db8c67d2c8d..5af48dd7c5fc 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -967,6 +967,21 @@ igmp_qrv - INTEGER Default: 2 (as specified by RFC2236 8.1) Minimum: 1 (as specified by RFC6636 4.5) +force_igmp_version - INTEGER + 0 - (default) No enforcement of a IGMP version, IGMPv1/v2 fallback + allowed. Will back to IGMPv3 mode again if all IGMPv1/v2 Querier + Present timer expires. + 1 - Enforce to use IGMP version 1. Will also reply IGMPv1 report if + receive IGMPv2/v3 query. + 2 - Enforce to use IGMP version 2. Will fallback to IGMPv1 if receive + IGMPv1 query message. Will reply report if receive IGMPv3 query. + 3 - Enforce to use IGMP version 3. The same react with default 0. + + Note: this is not the same with force_mld_version because IGMPv3 RFC3376 + Security Considerations does not have clear description that we could + ignore other version messages completely as MLDv2 RFC3810. So make + this value as default 0 is recommended. + conf/interface/* changes special settings per interface (where "interface" is the name of your network interface) diff --git a/Documentation/networking/seg6-sysctl.txt b/Documentation/networking/seg6-sysctl.txt new file mode 100644 index 000000000000..bdbde23b19cb --- /dev/null +++ b/Documentation/networking/seg6-sysctl.txt @@ -0,0 +1,18 @@ +/proc/sys/net/conf/<iface>/seg6_* variables: + +seg6_enabled - BOOL + Accept or drop SR-enabled IPv6 packets on this interface. + + Relevant packets are those with SRH present and DA = local. + + 0 - disabled (default) + not 0 - enabled + +seg6_require_hmac - INTEGER + Define HMAC policy for ingress SR-enabled packets on this interface. + + -1 - Ignore HMAC field + 0 - Accept SR packets without HMAC, validate SR packets with HMAC + 1 - Drop SR packets without HMAC, validate SR packets with HMAC + + Default is 0. diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts index b09f3bc5c6c1..c4d544244b19 100644 --- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts @@ -56,6 +56,10 @@ }; }; +&enet { + status = "ok"; +}; + &pci_phy0 { status = "ok"; }; @@ -174,6 +178,7 @@ &mdio_mux_iproc { mdio@10 { gphy0: eth-phy@10 { + enet-phy-lane-swap; reg = <0x10>; }; }; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index d95dc408629a..773ed593da4d 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -191,6 +191,18 @@ #include "ns2-clock.dtsi" + enet: ethernet@61000000 { + compatible = "brcm,ns2-amac"; + reg = <0x61000000 0x1000>, + <0x61090000 0x1000>, + <0x61030000 0x100>; + reg-names = "amac_base", "idm_base", "nicpm_base"; + interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>; + phy-handle = <&gphy0>; + phy-mode = "rgmii"; + status = "disabled"; + }; + dma0: dma@61360000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x61360000 0x1000>; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 292ae8bbeae2..76ed57f1b678 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1857,7 +1857,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = mlx5_create_auto_grouped_flow_table(ns, priority, num_entries, num_groups, - 0); + 0, 0); if (!IS_ERR(ft)) { prio->refcount = 0; @@ -1877,10 +1877,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; - u32 action; int err = 0; if (!is_valid_attr(flow_attr)) @@ -1905,12 +1905,12 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, } spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; handler->rule = mlx5_add_flow_rules(ft, spec, - action, - MLX5_FS_DEFAULT_FLOW_TAG, - dst, 1); + &flow_act, + dst, 1); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 10ce820daa48..c36be318de1a 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o mv88e6xxx-objs := chip.o mv88e6xxx-objs += global1.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o +mv88e6xxx-objs += port.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 72b9dac29901..d6d9d66b81ce 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -37,6 +37,7 @@ #include "mv88e6xxx.h" #include "global1.h" #include "global2.h" +#include "port.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) { @@ -221,22 +222,6 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) return 0; } -static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, - u16 *val) -{ - int addr = chip->info->port_base_addr + port; - - return mv88e6xxx_read(chip, addr, reg, val); -} - -static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, - u16 val) -{ - int addr = chip->info->port_base_addr + port; - - return mv88e6xxx_write(chip, addr, reg, val); -} - static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, int reg, u16 *val) { @@ -716,6 +701,47 @@ static bool mv88e6xxx_6352_family(struct mv88e6xxx_chip *chip) return chip->info->family == MV88E6XXX_FAMILY_6352; } +static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, + int link, int speed, int duplex, + phy_interface_t mode) +{ + int err; + + if (!chip->info->ops->port_set_link) + return 0; + + /* Port's MAC control must not be changed unless the link is down */ + err = chip->info->ops->port_set_link(chip, port, 0); + if (err) + return err; + + if (chip->info->ops->port_set_speed) { + err = chip->info->ops->port_set_speed(chip, port, speed); + if (err && err != -EOPNOTSUPP) + goto restore_link; + } + + if (chip->info->ops->port_set_duplex) { + err = chip->info->ops->port_set_duplex(chip, port, duplex); + if (err && err != -EOPNOTSUPP) + goto restore_link; + } + + if (chip->info->ops->port_set_rgmii_delay) { + err = chip->info->ops->port_set_rgmii_delay(chip, port, mode); + if (err && err != -EOPNOTSUPP) + goto restore_link; + } + + err = 0; +restore_link: + if (chip->info->ops->port_set_link(chip, port, link)) + netdev_err(chip->ds->ports[port].netdev, + "failed to restore MAC's link\n"); + + return err; +} + /* We expect the switch to perform auto negotiation if there is a real * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. @@ -724,64 +750,18 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct mv88e6xxx_chip *chip = ds->priv; - u16 reg; int err; if (!phy_is_pseudo_fixed_link(phydev)) return; mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); - if (err) - goto out; - - reg &= ~(PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX | - PORT_PCS_CTRL_UNFORCED); - - reg |= PORT_PCS_CTRL_FORCE_LINK; - if (phydev->link) - reg |= PORT_PCS_CTRL_LINK_UP; - - if (mv88e6xxx_6065_family(chip) && phydev->speed > SPEED_100) - goto out; - - switch (phydev->speed) { - case SPEED_1000: - reg |= PORT_PCS_CTRL_1000; - break; - case SPEED_100: - reg |= PORT_PCS_CTRL_100; - break; - case SPEED_10: - reg |= PORT_PCS_CTRL_10; - break; - default: - pr_info("Unknown speed"); - goto out; - } - - reg |= PORT_PCS_CTRL_FORCE_DUPLEX; - if (phydev->duplex == DUPLEX_FULL) - reg |= PORT_PCS_CTRL_DUPLEX_FULL; - - if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) && - (port >= mv88e6xxx_num_ports(chip) - 2)) { - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) - reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) - reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) - reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | - PORT_PCS_CTRL_RGMII_DELAY_TXCLK); - } - mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); - -out: + err = mv88e6xxx_port_setup_mac(chip, port, phydev->link, phydev->speed, + phydev->duplex, phydev->interface); mutex_unlock(&chip->reg_lock); + + if (err && err != -EOPNOTSUPP) + netdev_err(ds->ports[port].netdev, "failed to configure MAC\n"); } static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) @@ -1230,54 +1210,16 @@ static int _mv88e6xxx_atu_remove(struct mv88e6xxx_chip *chip, u16 fid, return _mv88e6xxx_atu_move(chip, fid, port, 0x0f, static_too); } -static const char * const mv88e6xxx_port_state_names[] = { - [PORT_CONTROL_STATE_DISABLED] = "Disabled", - [PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening", - [PORT_CONTROL_STATE_LEARNING] = "Learning", - [PORT_CONTROL_STATE_FORWARDING] = "Forwarding", -}; - -static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, - u8 state) -{ - struct dsa_switch *ds = chip->ds; - u16 reg; - int err; - u8 oldstate; - - err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); - if (err) - return err; - - oldstate = reg & PORT_CONTROL_STATE_MASK; - - reg &= ~PORT_CONTROL_STATE_MASK; - reg |= state; - - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); - if (err) - return err; - - netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", - mv88e6xxx_port_state_names[state], - mv88e6xxx_port_state_names[oldstate]); - - return 0; -} - static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) { struct net_device *bridge = chip->ports[port].bridge_dev; - const u16 mask = (1 << mv88e6xxx_num_ports(chip)) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; - u16 reg; - int err; int i; /* allow CPU port or DSA link(s) to send frames to every port */ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { - output_ports = mask; + output_ports = ~0; } else { for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { /* allow sending frames to every group member */ @@ -1293,14 +1235,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); - if (err) - return err; - - reg &= ~mask; - reg |= output_ports & mask; - - return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + return mv88e6xxx_port_set_vlan_map(chip, port, output_ports); } static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, @@ -1328,13 +1263,11 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, } mutex_lock(&chip->reg_lock); - err = _mv88e6xxx_port_state(chip, port, stp_state); + err = mv88e6xxx_port_set_state(chip, port, stp_state); mutex_unlock(&chip->reg_lock); if (err) - netdev_err(ds->ports[port].netdev, - "failed to update state to %s\n", - mv88e6xxx_port_state_names[stp_state]); + netdev_err(ds->ports[port].netdev, "failed to update state\n"); } static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) @@ -1350,49 +1283,6 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) netdev_err(ds->ports[port].netdev, "failed to flush ATU\n"); } -static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, - u16 *new, u16 *old) -{ - struct dsa_switch *ds = chip->ds; - u16 pvid, reg; - int err; - - err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); - if (err) - return err; - - pvid = reg & PORT_DEFAULT_VLAN_MASK; - - if (new) { - reg &= ~PORT_DEFAULT_VLAN_MASK; - reg |= *new & PORT_DEFAULT_VLAN_MASK; - - err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); - if (err) - return err; - - netdev_dbg(ds->ports[port].netdev, - "DefaultVID %d (was %d)\n", *new, pvid); - } - - if (old) - *old = pvid; - - return 0; -} - -static int _mv88e6xxx_port_pvid_get(struct mv88e6xxx_chip *chip, - int port, u16 *pvid) -{ - return _mv88e6xxx_port_pvid(chip, port, NULL, pvid); -} - -static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip, - int port, u16 pvid) -{ - return _mv88e6xxx_port_pvid(chip, port, &pvid, NULL); -} - static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip) { return mv88e6xxx_g1_wait(chip, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY); @@ -1572,7 +1462,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); - err = _mv88e6xxx_port_pvid_get(chip, port, &pvid); + err = mv88e6xxx_port_get_pvid(chip, port, &pvid); if (err) goto unlock; @@ -1736,75 +1626,6 @@ loadpurge: return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE); } -static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, - u16 *new, u16 *old) -{ - struct dsa_switch *ds = chip->ds; - u16 upper_mask; - u16 fid; - u16 reg; - int err; - - if (mv88e6xxx_num_databases(chip) == 4096) - upper_mask = 0xff; - else if (mv88e6xxx_num_databases(chip) == 256) - upper_mask = 0xf; - else - return -EOPNOTSUPP; - - /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); - if (err) - return err; - - fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; - - if (new) { - reg &= ~PORT_BASE_VLAN_FID_3_0_MASK; - reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - - err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); - if (err) - return err; - } - - /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); - if (err) - return err; - - fid |= (reg & upper_mask) << 4; - - if (new) { - reg &= ~upper_mask; - reg |= (*new >> 4) & upper_mask; - - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); - if (err) - return err; - - netdev_dbg(ds->ports[port].netdev, - "FID %d (was %d)\n", *new, fid); - } - - if (old) - *old = fid; - - return 0; -} - -static int _mv88e6xxx_port_fid_get(struct mv88e6xxx_chip *chip, - int port, u16 *fid) -{ - return _mv88e6xxx_port_fid(chip, port, NULL, fid); -} - -static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_chip *chip, - int port, u16 fid) -{ - return _mv88e6xxx_port_fid(chip, port, &fid, NULL); -} - static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) { DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID); @@ -1815,7 +1636,7 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) /* Set every FID bit used by the (un)bridged ports */ for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { - err = _mv88e6xxx_port_fid_get(chip, i, fid); + err = mv88e6xxx_port_get_fid(chip, i, fid); if (err) return err; @@ -1980,48 +1801,19 @@ unlock: return err; } -static const char * const mv88e6xxx_port_8021q_mode_names[] = { - [PORT_CONTROL_2_8021Q_DISABLED] = "Disabled", - [PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback", - [PORT_CONTROL_2_8021Q_CHECK] = "Check", - [PORT_CONTROL_2_8021Q_SECURE] = "Secure", -}; - static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { struct mv88e6xxx_chip *chip = ds->priv; - u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : + u16 mode = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; - u16 reg; int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - - err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); - if (err) - goto unlock; - - old = reg & PORT_CONTROL_2_8021Q_MASK; - - if (new != old) { - reg &= ~PORT_CONTROL_2_8021Q_MASK; - reg |= new & PORT_CONTROL_2_8021Q_MASK; - - err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); - if (err) - goto unlock; - - netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n", - mv88e6xxx_port_8021q_mode_names[new], - mv88e6xxx_port_8021q_mode_names[old]); - } - - err = 0; -unlock: + err = mv88e6xxx_port_set_8021q_mode(chip, port, mode); mutex_unlock(&chip->reg_lock); return err; @@ -2089,7 +1881,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, "failed to add VLAN %d%c\n", vid, untagged ? 'u' : 't'); - if (pvid && _mv88e6xxx_port_pvid_set(chip, port, vlan->vid_end)) + if (pvid && mv88e6xxx_port_set_pvid(chip, port, vlan->vid_end)) netdev_err(ds->ports[port].netdev, "failed to set PVID %d\n", vlan->vid_end); @@ -2144,7 +1936,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); - err = _mv88e6xxx_port_pvid_get(chip, port, &pvid); + err = mv88e6xxx_port_get_pvid(chip, port, &pvid); if (err) goto unlock; @@ -2154,7 +1946,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, goto unlock; if (vid == pvid) { - err = _mv88e6xxx_port_pvid_set(chip, port, 0); + err = mv88e6xxx_port_set_pvid(chip, port, 0); if (err) goto unlock; } @@ -2265,7 +2057,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, /* Null VLAN ID corresponds to the port private database */ if (vid == 0) - err = _mv88e6xxx_port_fid_get(chip, port, &vlan.fid); + err = mv88e6xxx_port_get_fid(chip, port, &vlan.fid); else err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false); if (err) @@ -2441,7 +2233,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, int err; /* Dump port's default Filtering Information Database (VLAN ID 0) */ - err = _mv88e6xxx_port_fid_get(chip, port, &fid); + err = mv88e6xxx_port_get_fid(chip, port, &fid); if (err) return err; @@ -2541,12 +2333,8 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) /* Set all ports to the disabled state. */ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { - err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); - if (err) - return err; - - err = mv88e6xxx_port_write(chip, i, PORT_CONTROL, - reg & 0xfffc); + err = mv88e6xxx_port_set_state(chip, i, + PORT_CONTROL_STATE_DISABLED); if (err) return err; } @@ -2616,35 +2404,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) int err; u16 reg; - if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || - mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) || - mv88e6xxx_6065_family(chip) || mv88e6xxx_6320_family(chip)) { - /* MAC Forcing register: don't force link, speed, - * duplex or flow control state to any particular - * values on physical ports, but force the CPU port - * and all DSA ports to their maximum bandwidth and - * full duplex. - */ - err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { - reg &= ~PORT_PCS_CTRL_UNFORCED; - reg |= PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX; - if (mv88e6xxx_6065_family(chip)) - reg |= PORT_PCS_CTRL_100; - else - reg |= PORT_PCS_CTRL_1000; - } else { - reg |= PORT_PCS_CTRL_UNFORCED; - } - - err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); - if (err) - return err; - } + /* MAC Forcing register: don't force link, speed, duplex or flow control + * state to any particular values on physical ports, but force the CPU + * port and all DSA ports to their maximum bandwidth and full duplex. + */ + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + err = mv88e6xxx_port_setup_mac(chip, port, LINK_FORCED_UP, + SPEED_MAX, DUPLEX_FULL, + PHY_INTERFACE_MODE_NA); + else + err = mv88e6xxx_port_setup_mac(chip, port, LINK_UNFORCED, + SPEED_UNFORCED, DUPLEX_UNFORCED, + PHY_INTERFACE_MODE_NA); + if (err) + return err; /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, * disable Header mode, enable IGMP/MLD snooping, disable VLAN @@ -2848,7 +2621,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - err = _mv88e6xxx_port_fid_set(chip, port, 0); + err = mv88e6xxx_port_set_fid(chip, port, 0); if (err) return err; @@ -3367,42 +3140,64 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6095_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6123_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6131_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6161_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6165_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_read, .phy_write = mv88e6xxx_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6171_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6172_ops = { @@ -3411,12 +3206,20 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6175_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6176_ops = { @@ -3425,12 +3228,19 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6185_ops = { .set_switch_mac = mv88e6xxx_g1_set_switch_mac, .phy_read = mv88e6xxx_phy_ppu_read, .phy_write = mv88e6xxx_phy_ppu_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6240_ops = { @@ -3439,6 +3249,10 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6320_ops = { @@ -3447,6 +3261,9 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6321_ops = { @@ -3455,18 +3272,29 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6350_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6351_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6185_port_set_speed, }; static const struct mv88e6xxx_ops mv88e6352_ops = { @@ -3475,6 +3303,10 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .set_switch_mac = mv88e6xxx_g2_set_switch_mac, .phy_read = mv88e6xxx_g2_smi_phy_read, .phy_write = mv88e6xxx_g2_smi_phy_write, + .port_set_link = mv88e6xxx_port_set_link, + .port_set_duplex = mv88e6xxx_port_set_duplex, + .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, + .port_set_speed = mv88e6352_port_set_speed, }; static const struct mv88e6xxx_info mv88e6xxx_table[] = { diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 6f48e5886b2a..929613021eff 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -61,16 +61,22 @@ #define PORT_PCS_CTRL 0x01 #define PORT_PCS_CTRL_RGMII_DELAY_RXCLK BIT(15) #define PORT_PCS_CTRL_RGMII_DELAY_TXCLK BIT(14) +#define PORT_PCS_CTRL_FORCE_SPEED BIT(13) /* 6390 */ +#define PORT_PCS_CTRL_ALTSPEED BIT(12) /* 6390 */ +#define PORT_PCS_CTRL_200BASE BIT(12) /* 6352 */ #define PORT_PCS_CTRL_FC BIT(7) #define PORT_PCS_CTRL_FORCE_FC BIT(6) #define PORT_PCS_CTRL_LINK_UP BIT(5) #define PORT_PCS_CTRL_FORCE_LINK BIT(4) #define PORT_PCS_CTRL_DUPLEX_FULL BIT(3) #define PORT_PCS_CTRL_FORCE_DUPLEX BIT(2) -#define PORT_PCS_CTRL_10 0x00 -#define PORT_PCS_CTRL_100 0x01 -#define PORT_PCS_CTRL_1000 0x02 -#define PORT_PCS_CTRL_UNFORCED 0x03 +#define PORT_PCS_CTRL_SPEED_MASK (0x03) +#define PORT_PCS_CTRL_SPEED_10 (0x00) +#define PORT_PCS_CTRL_SPEED_100 (0x01) +#define PORT_PCS_CTRL_SPEED_200 (0x02) /* 6065 and non Gb chips */ +#define PORT_PCS_CTRL_SPEED_1000 (0x02) +#define PORT_PCS_CTRL_SPEED_10000 (0x03) /* 6390X */ +#define PORT_PCS_CTRL_SPEED_UNFORCED (0x03) #define PORT_PAUSE_CTRL 0x02 #define PORT_SWITCH_ID 0x03 #define PORT_SWITCH_ID_PROD_NUM_6085 0x04a @@ -727,6 +733,41 @@ struct mv88e6xxx_ops { u16 *val); int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); + + /* RGMII Receive/Transmit Timing Control + * Add delay on PHY_INTERFACE_MODE_RGMII_*ID, no delay otherwise. + */ + int (*port_set_rgmii_delay)(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); + +#define LINK_FORCED_DOWN 0 +#define LINK_FORCED_UP 1 +#define LINK_UNFORCED -2 + + /* Port's MAC link state + * Use LINK_FORCED_UP or LINK_FORCED_DOWN to force link up or down, + * or LINK_UNFORCED for normal link detection. + */ + int (*port_set_link)(struct mv88e6xxx_chip *chip, int port, int link); + +#define DUPLEX_UNFORCED -2 + + /* Port's MAC duplex mode + * + * Use DUPLEX_HALF or DUPLEX_FULL to force half or full duplex, + * or DUPLEX_UNFORCED for normal duplex detection. + */ + int (*port_set_duplex)(struct mv88e6xxx_chip *chip, int port, int dup); + +#define SPEED_MAX INT_MAX +#define SPEED_UNFORCED -2 + + /* Port's MAC speed (in Mbps) + * + * Depending on the chip, 10, 100, 200, 1000, 2500, 10000 are valid. + * Use SPEED_UNFORCED for normal detection, SPEED_MAX for max value. + */ + int (*port_set_speed)(struct mv88e6xxx_chip *chip, int port, int speed); }; enum stat_type { diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c new file mode 100644 index 000000000000..e4978f6367aa --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -0,0 +1,498 @@ +/* + * Marvell 88E6xxx Switch Port Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "port.h" + +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_write(chip, addr, reg, val); +} + +/* Offset 0x01: MAC (or PCS or Physical) Control Register + * + * Link, Duplex and Flow Control have one force bit, one value bit. + * + * For port's MAC speed, ForceSpd (or SpdValue) bits 1:0 program the value. + * Alternative values require the 200BASE (or AltSpeed) bit 12 set. + * Newer chips need a ForcedSpd bit 13 set to consider the value. + */ + +static int mv88e6xxx_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~(PORT_PCS_CTRL_RGMII_DELAY_RXCLK | + PORT_PCS_CTRL_RGMII_DELAY_TXCLK); + + switch (mode) { + case PHY_INTERFACE_MODE_RGMII_RXID: + reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; + break; + case PHY_INTERFACE_MODE_RGMII_TXID: + reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK; + break; + case PHY_INTERFACE_MODE_RGMII_ID: + reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK | + PORT_PCS_CTRL_RGMII_DELAY_TXCLK; + break; + case PHY_INTERFACE_MODE_RGMII: + break; + default: + return 0; + } + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "delay RXCLK %s, TXCLK %s\n", + reg & PORT_PCS_CTRL_RGMII_DELAY_RXCLK ? "yes" : "no", + reg & PORT_PCS_CTRL_RGMII_DELAY_TXCLK ? "yes" : "no"); + + return 0; +} + +int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode) +{ + if (port < 5) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_rgmii_delay(chip, port, mode); +} + +int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode) +{ + if (port != 0) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_rgmii_delay(chip, port, mode); +} + +int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~(PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP); + + switch (link) { + case LINK_FORCED_DOWN: + reg |= PORT_PCS_CTRL_FORCE_LINK; + break; + case LINK_FORCED_UP: + reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP; + break; + case LINK_UNFORCED: + /* normal link detection */ + break; + default: + return -EINVAL; + } + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "%s link %s\n", + reg & PORT_PCS_CTRL_FORCE_LINK ? "Force" : "Unforce", + reg & PORT_PCS_CTRL_LINK_UP ? "up" : "down"); + + return 0; +} + +int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~(PORT_PCS_CTRL_FORCE_DUPLEX | PORT_PCS_CTRL_DUPLEX_FULL); + + switch (dup) { + case DUPLEX_HALF: + reg |= PORT_PCS_CTRL_FORCE_DUPLEX; + break; + case DUPLEX_FULL: + reg |= PORT_PCS_CTRL_FORCE_DUPLEX | PORT_PCS_CTRL_DUPLEX_FULL; + break; + case DUPLEX_UNFORCED: + /* normal duplex detection */ + break; + default: + return -EINVAL; + } + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "%s %s duplex\n", + reg & PORT_PCS_CTRL_FORCE_DUPLEX ? "Force" : "Unforce", + reg & PORT_PCS_CTRL_DUPLEX_FULL ? "full" : "half"); + + return 0; +} + +static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, + int speed, bool alt_bit, bool force_bit) +{ + u16 reg, ctrl; + int err; + + switch (speed) { + case 10: + ctrl = PORT_PCS_CTRL_SPEED_10; + break; + case 100: + ctrl = PORT_PCS_CTRL_SPEED_100; + break; + case 200: + if (alt_bit) + ctrl = PORT_PCS_CTRL_SPEED_100 | PORT_PCS_CTRL_ALTSPEED; + else + ctrl = PORT_PCS_CTRL_SPEED_200; + break; + case 1000: + ctrl = PORT_PCS_CTRL_SPEED_1000; + break; + case 2500: + ctrl = PORT_PCS_CTRL_SPEED_1000 | PORT_PCS_CTRL_ALTSPEED; + break; + case 10000: + /* all bits set, fall through... */ + case SPEED_UNFORCED: + ctrl = PORT_PCS_CTRL_SPEED_UNFORCED; + break; + default: + return -EOPNOTSUPP; + } + + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) + return err; + + reg &= ~PORT_PCS_CTRL_SPEED_MASK; + if (alt_bit) + reg &= ~PORT_PCS_CTRL_ALTSPEED; + if (force_bit) { + reg &= ~PORT_PCS_CTRL_FORCE_SPEED; + if (speed) + ctrl |= PORT_PCS_CTRL_FORCE_SPEED; + } + reg |= ctrl; + + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; + + if (speed) + netdev_dbg(chip->ds->ports[port].netdev, + "Speed set to %d Mbps\n", speed); + else + netdev_dbg(chip->ds->ports[port].netdev, "Speed unforced\n"); + + return 0; +} + +/* Support 10, 100, 200 Mbps (e.g. 88E6065 family) */ +int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = 200; + + if (speed > 200) + return -EOPNOTSUPP; + + /* Setting 200 Mbps on port 0 to 3 selects 100 Mbps */ + return mv88e6xxx_port_set_speed(chip, port, speed, false, false); +} + +/* Support 10, 100, 1000 Mbps (e.g. 88E6185 family) */ +int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = 1000; + + if (speed == 200 || speed > 1000) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, false, false); +} + +/* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */ +int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = 1000; + + if (speed > 1000) + return -EOPNOTSUPP; + + if (speed == 200 && port < 5) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, true, false); +} + +/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6390) */ +int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = port < 9 ? 1000 : 2500; + + if (speed > 2500) + return -EOPNOTSUPP; + + if (speed == 200 && port != 0) + return -EOPNOTSUPP; + + if (speed == 2500 && port < 9) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, true, true); +} + +/* Support 10, 100, 200, 1000, 2500, 10000 Mbps (e.g. 88E6190X) */ +int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = port < 9 ? 1000 : 10000; + + if (speed == 200 && port != 0) + return -EOPNOTSUPP; + + if (speed >= 2500 && port < 9) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, true, true); +} + +/* Offset 0x04: Port Control Register */ + +static const char * const mv88e6xxx_port_state_names[] = { + [PORT_CONTROL_STATE_DISABLED] = "Disabled", + [PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening", + [PORT_CONTROL_STATE_LEARNING] = "Learning", + [PORT_CONTROL_STATE_FORWARDING] = "Forwarding", +}; + +int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); + if (err) + return err; + + reg &= ~PORT_CONTROL_STATE_MASK; + reg |= state; + + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "PortState set to %s\n", + mv88e6xxx_port_state_names[state]); + + return 0; +} + +/* Offset 0x05: Port Control 1 */ + +/* Offset 0x06: Port Based VLAN Map */ + +int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map) +{ + const u16 mask = GENMASK(mv88e6xxx_num_ports(chip) - 1, 0); + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; + + reg &= ~mask; + reg |= map & mask; + + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "VLANTable set to %.3x\n", + map); + + return 0; +} + +int mv88e6xxx_port_get_fid(struct mv88e6xxx_chip *chip, int port, u16 *fid) +{ + const u16 upper_mask = (mv88e6xxx_num_databases(chip) - 1) >> 4; + u16 reg; + int err; + + /* Port's default FID lower 4 bits are located in reg 0x06, offset 12 */ + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; + + *fid = (reg & 0xf000) >> 12; + + /* Port's default FID upper bits are located in reg 0x05, offset 0 */ + if (upper_mask) { + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; + + *fid |= (reg & upper_mask) << 4; + } + + return 0; +} + +int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid) +{ + const u16 upper_mask = (mv88e6xxx_num_databases(chip) - 1) >> 4; + u16 reg; + int err; + + if (fid >= mv88e6xxx_num_databases(chip)) + return -EINVAL; + + /* Port's default FID lower 4 bits are located in reg 0x06, offset 12 */ + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; + + reg &= 0x0fff; + reg |= (fid & 0x000f) << 12; + + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; + + /* Port's default FID upper bits are located in reg 0x05, offset 0 */ + if (upper_mask) { + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; + + reg &= ~upper_mask; + reg |= (fid >> 4) & upper_mask; + + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); + if (err) + return err; + } + + netdev_dbg(chip->ds->ports[port].netdev, "FID set to %u\n", fid); + + return 0; +} + +/* Offset 0x07: Default Port VLAN ID & Priority */ + +int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; + + *pvid = reg & PORT_DEFAULT_VLAN_MASK; + + return 0; +} + +int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; + + reg &= ~PORT_DEFAULT_VLAN_MASK; + reg |= pvid & PORT_DEFAULT_VLAN_MASK; + + err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "DefaultVID set to %u\n", + pvid); + + return 0; +} + +/* Offset 0x08: Port Control 2 Register */ + +static const char * const mv88e6xxx_port_8021q_mode_names[] = { + [PORT_CONTROL_2_8021Q_DISABLED] = "Disabled", + [PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback", + [PORT_CONTROL_2_8021Q_CHECK] = "Check", + [PORT_CONTROL_2_8021Q_SECURE] = "Secure", +}; + +int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, + u16 mode) +{ + u16 reg; + int err; + + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) + return err; + + reg &= ~PORT_CONTROL_2_8021Q_MASK; + reg |= mode & PORT_CONTROL_2_8021Q_MASK; + + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) + return err; + + netdev_dbg(chip->ds->ports[port].netdev, "802.1QMode set to %s\n", + mv88e6xxx_port_8021q_mode_names[mode]); + + return 0; +} diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h new file mode 100644 index 000000000000..499129c1489c --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -0,0 +1,52 @@ +/* + * Marvell 88E6xxx Switch Port Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_PORT_H +#define _MV88E6XXX_PORT_H + +#include "mv88e6xxx.h" + +int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val); +int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val); + +int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); +int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port, + phy_interface_t mode); + +int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link); + +int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup); + +int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); + +int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state); + +int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map); + +int mv88e6xxx_port_get_fid(struct mv88e6xxx_chip *chip, int port, u16 *fid); +int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid); + +int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid); +int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid); + +int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, + u16 mode); + +#endif /* _MV88E6XXX_PORT_H */ diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index dbdf06f2af49..a0cacbe846ba 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -1000,7 +1000,7 @@ typhoon_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct typhoon *tp = netdev_priv(dev); - u32 supported, advertising; + u32 supported, advertising = 0; supported = SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg; @@ -1049,6 +1049,11 @@ typhoon_get_link_ksettings(struct net_device *dev, else cmd->base.autoneg = AUTONEG_DISABLE; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 4a9a16e25666..3aaad33cdbc6 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -1816,21 +1816,23 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) strlcpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info)); } -static int get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct netdev_private *np = netdev_priv(dev); spin_lock_irq(&np->lock); - mii_ethtool_gset(&np->mii_if, ecmd); + mii_ethtool_get_link_ksettings(&np->mii_if, cmd); spin_unlock_irq(&np->lock); return 0; } -static int set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct netdev_private *np = netdev_priv(dev); int res; spin_lock_irq(&np->lock); - res = mii_ethtool_sset(&np->mii_if, ecmd); + res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd); spin_unlock_irq(&np->lock); check_duplex(dev); return res; @@ -1861,12 +1863,12 @@ static void set_msglevel(struct net_device *dev, u32 val) static const struct ethtool_ops ethtool_ops = { .begin = check_if_running, .get_drvinfo = get_drvinfo, - .get_settings = get_settings, - .set_settings = set_settings, .nway_reset = nway_reset, .get_link = get_link, .get_msglevel = get_msglevel, .set_msglevel = set_msglevel, + .get_link_ksettings = get_link_ksettings, + .set_link_ksettings = set_link_ksettings, }; static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index f8df8248035e..93def92f9997 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1290,15 +1290,6 @@ static int greth_mdio_probe(struct net_device *dev) return 0; } -static inline int phy_aneg_done(struct phy_device *phydev) -{ - int retval; - - retval = phy_read(phydev, MII_BMSR); - - return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); -} - static int greth_mdio_init(struct greth_private *greth) { int ret; diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index a5c1e290677a..16f0c70266bc 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -429,14 +429,16 @@ static const char version[] = "acenic.c: v0.92 08/05/2002 Jes Sorensen, [email protected]\n" " http://home.cern.ch/~jes/gige/acenic.html\n"; -static int ace_get_settings(struct net_device *, struct ethtool_cmd *); -static int ace_set_settings(struct net_device *, struct ethtool_cmd *); +static int ace_get_link_ksettings(struct net_device *, + struct ethtool_link_ksettings *); +static int ace_set_link_ksettings(struct net_device *, + const struct ethtool_link_ksettings *); static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *); static const struct ethtool_ops ace_ethtool_ops = { - .get_settings = ace_get_settings, - .set_settings = ace_set_settings, .get_drvinfo = ace_get_drvinfo, + .get_link_ksettings = ace_get_link_ksettings, + .set_link_ksettings = ace_set_link_ksettings, }; static void ace_watchdog(struct net_device *dev); @@ -2579,43 +2581,44 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int ace_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; u32 link; + u32 supported; - memset(ecmd, 0, sizeof(struct ethtool_cmd)); - ecmd->supported = - (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | - SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | - SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | - SUPPORTED_Autoneg | SUPPORTED_FIBRE); + memset(cmd, 0, sizeof(struct ethtool_link_ksettings)); - ecmd->port = PORT_FIBRE; - ecmd->transceiver = XCVR_INTERNAL; + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg | SUPPORTED_FIBRE); + + cmd->base.port = PORT_FIBRE; link = readl(®s->GigLnkState); - if (link & LNK_1000MB) - ethtool_cmd_speed_set(ecmd, SPEED_1000); - else { + if (link & LNK_1000MB) { + cmd->base.speed = SPEED_1000; + } else { link = readl(®s->FastLnkState); if (link & LNK_100MB) - ethtool_cmd_speed_set(ecmd, SPEED_100); + cmd->base.speed = SPEED_100; else if (link & LNK_10MB) - ethtool_cmd_speed_set(ecmd, SPEED_10); + cmd->base.speed = SPEED_10; else - ethtool_cmd_speed_set(ecmd, 0); + cmd->base.speed = 0; } if (link & LNK_FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; if (link & LNK_NEGOTIATE) - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; else - ecmd->autoneg = AUTONEG_DISABLE; + cmd->base.autoneg = AUTONEG_DISABLE; #if 0 /* @@ -2626,13 +2629,15 @@ static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) ecmd->txcoal = readl(®s->TuneTxCoalTicks); ecmd->rxcoal = readl(®s->TuneRxCoalTicks); #endif - ecmd->maxtxpkt = readl(®s->TuneMaxTxDesc); - ecmd->maxrxpkt = readl(®s->TuneMaxRxDesc); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); return 0; } -static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int ace_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; @@ -2655,11 +2660,11 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL; if (!ACE_IS_TIGON_I(ap)) link |= LNK_TX_FLOW_CTL_Y; - if (ecmd->autoneg == AUTONEG_ENABLE) + if (cmd->base.autoneg == AUTONEG_ENABLE) link |= LNK_NEGOTIATE; - if (ethtool_cmd_speed(ecmd) != speed) { + if (cmd->base.speed != speed) { link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB); - switch (ethtool_cmd_speed(ecmd)) { + switch (cmd->base.speed) { case SPEED_1000: link |= LNK_1000MB; break; @@ -2672,7 +2677,7 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) } } - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) link |= LNK_FULL_DUPLEX; if (link != ap->link) { diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 0038709fd317..7ab6efbe4189 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -173,11 +173,13 @@ config SUNLANCE config AMD_XGBE tristate "AMD 10GbE Ethernet driver" - depends on ((OF_NET && OF_ADDRESS) || ACPI) && HAS_IOMEM && HAS_DMA - depends on ARM64 || COMPILE_TEST + depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM && HAS_DMA + depends on X86 || ARM64 || COMPILE_TEST select BITREVERSE select CRC32 select PTP_1588_CLOCK + select PHYLIB + select AMD_XGBE_HAVE_ECC if X86 ---help--- This driver supports the AMD 10GbE Ethernet device found on an AMD SoC. @@ -195,4 +197,8 @@ config AMD_XGBE_DCB If unsure, say N. +config AMD_XGBE_HAVE_ECC + bool + default n + endif # NET_VENDOR_AMD diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 84b4ffbd084a..11cf1e3e0295 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1421,21 +1421,23 @@ static void amd8111e_get_regs(struct net_device *dev, struct ethtool_regs *regs, amd8111e_read_regs(lp, buf); } -static int amd8111e_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int amd8111e_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct amd8111e_priv *lp = netdev_priv(dev); spin_lock_irq(&lp->lock); - mii_ethtool_gset(&lp->mii_if, ecmd); + mii_ethtool_get_link_ksettings(&lp->mii_if, cmd); spin_unlock_irq(&lp->lock); return 0; } -static int amd8111e_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int amd8111e_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct amd8111e_priv *lp = netdev_priv(dev); int res; spin_lock_irq(&lp->lock); - res = mii_ethtool_sset(&lp->mii_if, ecmd); + res = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd); spin_unlock_irq(&lp->lock); return res; } @@ -1482,12 +1484,12 @@ static const struct ethtool_ops ops = { .get_drvinfo = amd8111e_get_drvinfo, .get_regs_len = amd8111e_get_regs_len, .get_regs = amd8111e_get_regs, - .get_settings = amd8111e_get_settings, - .set_settings = amd8111e_set_settings, .nway_reset = amd8111e_nway_reset, .get_link = amd8111e_get_link, .get_wol = amd8111e_get_wol, .set_wol = amd8111e_set_wol, + .get_link_ksettings = amd8111e_get_link_ksettings, + .set_link_ksettings = amd8111e_set_link_ksettings, }; /* This function handles all the ethtool ioctls. It gives driver info, diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index adc7ab99a2f6..41e58cca8fee 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -677,7 +677,8 @@ static void pcnet32_poll_controller(struct net_device *dev) } #endif -static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int pcnet32_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long flags; @@ -685,14 +686,15 @@ static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (lp->mii) { spin_lock_irqsave(&lp->lock, flags); - mii_ethtool_gset(&lp->mii_if, cmd); + mii_ethtool_get_link_ksettings(&lp->mii_if, cmd); spin_unlock_irqrestore(&lp->lock, flags); r = 0; } return r; } -static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int pcnet32_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long flags; @@ -700,7 +702,7 @@ static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (lp->mii) { spin_lock_irqsave(&lp->lock, flags); - r = mii_ethtool_sset(&lp->mii_if, cmd); + r = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd); spin_unlock_irqrestore(&lp->lock, flags); } return r; @@ -1440,8 +1442,6 @@ static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, } static const struct ethtool_ops pcnet32_ethtool_ops = { - .get_settings = pcnet32_get_settings, - .set_settings = pcnet32_set_settings, .get_drvinfo = pcnet32_get_drvinfo, .get_msglevel = pcnet32_get_msglevel, .set_msglevel = pcnet32_set_msglevel, @@ -1455,6 +1455,8 @@ static const struct ethtool_ops pcnet32_ethtool_ops = { .get_regs_len = pcnet32_get_regs_len, .get_regs = pcnet32_get_regs, .get_sset_count = pcnet32_get_sset_count, + .get_link_ksettings = pcnet32_get_link_ksettings, + .set_link_ksettings = pcnet32_set_link_ksettings, }; /* only probes for non-PCI devices, the rest are handled by diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile index 171a7e68048d..0dea8f5da899 100644 --- a/drivers/net/ethernet/amd/xgbe/Makefile +++ b/drivers/net/ethernet/amd/xgbe/Makefile @@ -2,7 +2,10 @@ obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \ xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o \ - xgbe-ptp.o + xgbe-ptp.o \ + xgbe-i2c.o xgbe-phy-v1.o xgbe-phy-v2.o \ + xgbe-platform.o +amd-xgbe-$(CONFIG_PCI) += xgbe-pci.o amd-xgbe-$(CONFIG_AMD_XGBE_DCB) += xgbe-dcb.o amd-xgbe-$(CONFIG_DEBUG_FS) += xgbe-debugfs.o diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index bbef95973c27..5b7ba25e0065 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -159,6 +159,8 @@ #define DMA_ISR_MACIS_WIDTH 1 #define DMA_ISR_MTLIS_INDEX 16 #define DMA_ISR_MTLIS_WIDTH 1 +#define DMA_MR_INTM_INDEX 12 +#define DMA_MR_INTM_WIDTH 2 #define DMA_MR_SWR_INDEX 0 #define DMA_MR_SWR_WIDTH 1 #define DMA_SBMR_EAME_INDEX 11 @@ -309,6 +311,11 @@ #define MAC_HWF0R 0x011c #define MAC_HWF1R 0x0120 #define MAC_HWF2R 0x0124 +#define MAC_MDIOSCAR 0x0200 +#define MAC_MDIOSCCDR 0x0204 +#define MAC_MDIOISR 0x0214 +#define MAC_MDIOIER 0x0218 +#define MAC_MDIOCL22R 0x0220 #define MAC_GPIOCR 0x0278 #define MAC_GPIOSR 0x027c #define MAC_MACA0HR 0x0300 @@ -409,10 +416,34 @@ #define MAC_ISR_MMCTXIS_WIDTH 1 #define MAC_ISR_PMTIS_INDEX 4 #define MAC_ISR_PMTIS_WIDTH 1 +#define MAC_ISR_SMI_INDEX 1 +#define MAC_ISR_SMI_WIDTH 1 #define MAC_ISR_TSIS_INDEX 12 #define MAC_ISR_TSIS_WIDTH 1 #define MAC_MACA1HR_AE_INDEX 31 #define MAC_MACA1HR_AE_WIDTH 1 +#define MAC_MDIOIER_SNGLCOMPIE_INDEX 12 +#define MAC_MDIOIER_SNGLCOMPIE_WIDTH 1 +#define MAC_MDIOISR_SNGLCOMPINT_INDEX 12 +#define MAC_MDIOISR_SNGLCOMPINT_WIDTH 1 +#define MAC_MDIOSCAR_DA_INDEX 21 +#define MAC_MDIOSCAR_DA_WIDTH 5 +#define MAC_MDIOSCAR_PA_INDEX 16 +#define MAC_MDIOSCAR_PA_WIDTH 5 +#define MAC_MDIOSCAR_RA_INDEX 0 +#define MAC_MDIOSCAR_RA_WIDTH 16 +#define MAC_MDIOSCAR_REG_INDEX 0 +#define MAC_MDIOSCAR_REG_WIDTH 21 +#define MAC_MDIOSCCDR_BUSY_INDEX 22 +#define MAC_MDIOSCCDR_BUSY_WIDTH 1 +#define MAC_MDIOSCCDR_CMD_INDEX 16 +#define MAC_MDIOSCCDR_CMD_WIDTH 2 +#define MAC_MDIOSCCDR_CR_INDEX 19 +#define MAC_MDIOSCCDR_CR_WIDTH 3 +#define MAC_MDIOSCCDR_DATA_INDEX 0 +#define MAC_MDIOSCCDR_DATA_WIDTH 16 +#define MAC_MDIOSCCDR_SADDR_INDEX 18 +#define MAC_MDIOSCCDR_SADDR_WIDTH 1 #define MAC_PFR_HMC_INDEX 2 #define MAC_PFR_HMC_WIDTH 1 #define MAC_PFR_HPF_INDEX 10 @@ -790,6 +821,10 @@ #define MTL_Q_RQOMR_RSF_WIDTH 1 #define MTL_Q_RQOMR_RTC_INDEX 0 #define MTL_Q_RQOMR_RTC_WIDTH 2 +#define MTL_Q_TQDR_TRCSTS_INDEX 1 +#define MTL_Q_TQDR_TRCSTS_WIDTH 2 +#define MTL_Q_TQDR_TXQSTS_INDEX 4 +#define MTL_Q_TQDR_TXQSTS_WIDTH 1 #define MTL_Q_TQOMR_FTQ_INDEX 0 #define MTL_Q_TQOMR_FTQ_WIDTH 1 #define MTL_Q_TQOMR_Q2TCMAP_INDEX 8 @@ -852,14 +887,16 @@ #define MTL_TSA_SP 0x00 #define MTL_TSA_ETS 0x02 -/* PCS MMD select register offset - * The MMD select register is used for accessing PCS registers - * when the underlying APB3 interface is using indirect addressing. - * Indirect addressing requires accessing registers in two phases, - * an address phase and a data phase. The address phases requires - * writing an address selection value to the MMD select regiesters. - */ -#define PCS_MMD_SELECT 0xff +/* PCS register offsets */ +#define PCS_V1_WINDOW_SELECT 0x03fc +#define PCS_V2_WINDOW_DEF 0x9060 +#define PCS_V2_WINDOW_SELECT 0x9064 + +/* PCS register entry bit positions and sizes */ +#define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6 +#define PCS_V2_WINDOW_DEF_OFFSET_WIDTH 14 +#define PCS_V2_WINDOW_DEF_SIZE_INDEX 2 +#define PCS_V2_WINDOW_DEF_SIZE_WIDTH 4 /* SerDes integration register offsets */ #define SIR0_KR_RT_1 0x002c @@ -903,6 +940,198 @@ #define RXTX_REG129_RXDFE_CONFIG_INDEX 14 #define RXTX_REG129_RXDFE_CONFIG_WIDTH 2 +/* MAC Control register offsets */ +#define XP_PROP_0 0x0000 +#define XP_PROP_1 0x0004 +#define XP_PROP_2 0x0008 +#define XP_PROP_3 0x000c +#define XP_PROP_4 0x0010 +#define XP_PROP_5 0x0014 +#define XP_MAC_ADDR_LO 0x0020 +#define XP_MAC_ADDR_HI 0x0024 +#define XP_ECC_ISR 0x0030 +#define XP_ECC_IER 0x0034 +#define XP_ECC_CNT0 0x003c +#define XP_ECC_CNT1 0x0040 +#define XP_DRIVER_INT_REQ 0x0060 +#define XP_DRIVER_INT_RO 0x0064 +#define XP_DRIVER_SCRATCH_0 0x0068 +#define XP_DRIVER_SCRATCH_1 0x006c +#define XP_INT_EN 0x0078 +#define XP_I2C_MUTEX 0x0080 +#define XP_MDIO_MUTEX 0x0084 + +/* MAC Control register entry bit positions and sizes */ +#define XP_DRIVER_INT_REQ_REQUEST_INDEX 0 +#define XP_DRIVER_INT_REQ_REQUEST_WIDTH 1 +#define XP_DRIVER_INT_RO_STATUS_INDEX 0 +#define XP_DRIVER_INT_RO_STATUS_WIDTH 1 +#define XP_DRIVER_SCRATCH_0_COMMAND_INDEX 0 +#define XP_DRIVER_SCRATCH_0_COMMAND_WIDTH 8 +#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_INDEX 8 +#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_WIDTH 8 +#define XP_ECC_CNT0_RX_DED_INDEX 24 +#define XP_ECC_CNT0_RX_DED_WIDTH 8 +#define XP_ECC_CNT0_RX_SEC_INDEX 16 +#define XP_ECC_CNT0_RX_SEC_WIDTH 8 +#define XP_ECC_CNT0_TX_DED_INDEX 8 +#define XP_ECC_CNT0_TX_DED_WIDTH 8 +#define XP_ECC_CNT0_TX_SEC_INDEX 0 +#define XP_ECC_CNT0_TX_SEC_WIDTH 8 +#define XP_ECC_CNT1_DESC_DED_INDEX 8 +#define XP_ECC_CNT1_DESC_DED_WIDTH 8 +#define XP_ECC_CNT1_DESC_SEC_INDEX 0 +#define XP_ECC_CNT1_DESC_SEC_WIDTH 8 +#define XP_ECC_IER_DESC_DED_INDEX 0 +#define XP_ECC_IER_DESC_DED_WIDTH 1 +#define XP_ECC_IER_DESC_SEC_INDEX 1 +#define XP_ECC_IER_DESC_SEC_WIDTH 1 +#define XP_ECC_IER_RX_DED_INDEX 2 +#define XP_ECC_IER_RX_DED_WIDTH 1 +#define XP_ECC_IER_RX_SEC_INDEX 3 +#define XP_ECC_IER_RX_SEC_WIDTH 1 +#define XP_ECC_IER_TX_DED_INDEX 4 +#define XP_ECC_IER_TX_DED_WIDTH 1 +#define XP_ECC_IER_TX_SEC_INDEX 5 +#define XP_ECC_IER_TX_SEC_WIDTH 1 +#define XP_ECC_ISR_DESC_DED_INDEX 0 +#define XP_ECC_ISR_DESC_DED_WIDTH 1 +#define XP_ECC_ISR_DESC_SEC_INDEX 1 +#define XP_ECC_ISR_DESC_SEC_WIDTH 1 +#define XP_ECC_ISR_RX_DED_INDEX 2 +#define XP_ECC_ISR_RX_DED_WIDTH 1 +#define XP_ECC_ISR_RX_SEC_INDEX 3 +#define XP_ECC_ISR_RX_SEC_WIDTH 1 +#define XP_ECC_ISR_TX_DED_INDEX 4 +#define XP_ECC_ISR_TX_DED_WIDTH 1 +#define XP_ECC_ISR_TX_SEC_INDEX 5 +#define XP_ECC_ISR_TX_SEC_WIDTH 1 +#define XP_I2C_MUTEX_BUSY_INDEX 31 +#define XP_I2C_MUTEX_BUSY_WIDTH 1 +#define XP_I2C_MUTEX_ID_INDEX 29 +#define XP_I2C_MUTEX_ID_WIDTH 2 +#define XP_I2C_MUTEX_ACTIVE_INDEX 0 +#define XP_I2C_MUTEX_ACTIVE_WIDTH 1 +#define XP_MAC_ADDR_HI_VALID_INDEX 31 +#define XP_MAC_ADDR_HI_VALID_WIDTH 1 +#define XP_PROP_0_CONN_TYPE_INDEX 28 +#define XP_PROP_0_CONN_TYPE_WIDTH 3 +#define XP_PROP_0_MDIO_ADDR_INDEX 16 +#define XP_PROP_0_MDIO_ADDR_WIDTH 5 +#define XP_PROP_0_PORT_ID_INDEX 0 +#define XP_PROP_0_PORT_ID_WIDTH 8 +#define XP_PROP_0_PORT_MODE_INDEX 8 +#define XP_PROP_0_PORT_MODE_WIDTH 4 +#define XP_PROP_0_PORT_SPEEDS_INDEX 23 +#define XP_PROP_0_PORT_SPEEDS_WIDTH 4 +#define XP_PROP_1_MAX_RX_DMA_INDEX 24 +#define XP_PROP_1_MAX_RX_DMA_WIDTH 5 +#define XP_PROP_1_MAX_RX_QUEUES_INDEX 8 +#define XP_PROP_1_MAX_RX_QUEUES_WIDTH 5 +#define XP_PROP_1_MAX_TX_DMA_INDEX 16 +#define XP_PROP_1_MAX_TX_DMA_WIDTH 5 +#define XP_PROP_1_MAX_TX_QUEUES_INDEX 0 +#define XP_PROP_1_MAX_TX_QUEUES_WIDTH 5 +#define XP_PROP_2_RX_FIFO_SIZE_INDEX 16 +#define XP_PROP_2_RX_FIFO_SIZE_WIDTH 16 +#define XP_PROP_2_TX_FIFO_SIZE_INDEX 0 +#define XP_PROP_2_TX_FIFO_SIZE_WIDTH 16 +#define XP_PROP_3_GPIO_MASK_INDEX 28 +#define XP_PROP_3_GPIO_MASK_WIDTH 4 +#define XP_PROP_3_GPIO_MOD_ABS_INDEX 20 +#define XP_PROP_3_GPIO_MOD_ABS_WIDTH 4 +#define XP_PROP_3_GPIO_RATE_SELECT_INDEX 16 +#define XP_PROP_3_GPIO_RATE_SELECT_WIDTH 4 +#define XP_PROP_3_GPIO_RX_LOS_INDEX 24 +#define XP_PROP_3_GPIO_RX_LOS_WIDTH 4 +#define XP_PROP_3_GPIO_TX_FAULT_INDEX 12 +#define XP_PROP_3_GPIO_TX_FAULT_WIDTH 4 +#define XP_PROP_3_GPIO_ADDR_INDEX 8 +#define XP_PROP_3_GPIO_ADDR_WIDTH 3 +#define XP_PROP_3_MDIO_RESET_INDEX 0 +#define XP_PROP_3_MDIO_RESET_WIDTH 2 +#define XP_PROP_3_MDIO_RESET_I2C_ADDR_INDEX 8 +#define XP_PROP_3_MDIO_RESET_I2C_ADDR_WIDTH 3 +#define XP_PROP_3_MDIO_RESET_I2C_GPIO_INDEX 12 +#define XP_PROP_3_MDIO_RESET_I2C_GPIO_WIDTH 4 +#define XP_PROP_3_MDIO_RESET_INT_GPIO_INDEX 4 +#define XP_PROP_3_MDIO_RESET_INT_GPIO_WIDTH 2 +#define XP_PROP_4_MUX_ADDR_HI_INDEX 8 +#define XP_PROP_4_MUX_ADDR_HI_WIDTH 5 +#define XP_PROP_4_MUX_ADDR_LO_INDEX 0 +#define XP_PROP_4_MUX_ADDR_LO_WIDTH 3 +#define XP_PROP_4_MUX_CHAN_INDEX 4 +#define XP_PROP_4_MUX_CHAN_WIDTH 3 +#define XP_PROP_4_REDRV_ADDR_INDEX 16 +#define XP_PROP_4_REDRV_ADDR_WIDTH 7 +#define XP_PROP_4_REDRV_IF_INDEX 23 +#define XP_PROP_4_REDRV_IF_WIDTH 1 +#define XP_PROP_4_REDRV_LANE_INDEX 24 +#define XP_PROP_4_REDRV_LANE_WIDTH 3 +#define XP_PROP_4_REDRV_MODEL_INDEX 28 +#define XP_PROP_4_REDRV_MODEL_WIDTH 3 +#define XP_PROP_4_REDRV_PRESENT_INDEX 31 +#define XP_PROP_4_REDRV_PRESENT_WIDTH 1 + +/* I2C Control register offsets */ +#define IC_CON 0x0000 +#define IC_TAR 0x0004 +#define IC_DATA_CMD 0x0010 +#define IC_INTR_STAT 0x002c +#define IC_INTR_MASK 0x0030 +#define IC_RAW_INTR_STAT 0x0034 +#define IC_CLR_INTR 0x0040 +#define IC_CLR_TX_ABRT 0x0054 +#define IC_CLR_STOP_DET 0x0060 +#define IC_ENABLE 0x006c +#define IC_TXFLR 0x0074 +#define IC_RXFLR 0x0078 +#define IC_TX_ABRT_SOURCE 0x0080 +#define IC_ENABLE_STATUS 0x009c +#define IC_COMP_PARAM_1 0x00f4 + +/* I2C Control register entry bit positions and sizes */ +#define IC_COMP_PARAM_1_MAX_SPEED_MODE_INDEX 2 +#define IC_COMP_PARAM_1_MAX_SPEED_MODE_WIDTH 2 +#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_INDEX 8 +#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_WIDTH 8 +#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_INDEX 16 +#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_WIDTH 8 +#define IC_CON_MASTER_MODE_INDEX 0 +#define IC_CON_MASTER_MODE_WIDTH 1 +#define IC_CON_RESTART_EN_INDEX 5 +#define IC_CON_RESTART_EN_WIDTH 1 +#define IC_CON_RX_FIFO_FULL_HOLD_INDEX 9 +#define IC_CON_RX_FIFO_FULL_HOLD_WIDTH 1 +#define IC_CON_SLAVE_DISABLE_INDEX 6 +#define IC_CON_SLAVE_DISABLE_WIDTH 1 +#define IC_CON_SPEED_INDEX 1 +#define IC_CON_SPEED_WIDTH 2 +#define IC_DATA_CMD_CMD_INDEX 8 +#define IC_DATA_CMD_CMD_WIDTH 1 +#define IC_DATA_CMD_STOP_INDEX 9 +#define IC_DATA_CMD_STOP_WIDTH 1 +#define IC_ENABLE_ABORT_INDEX 1 +#define IC_ENABLE_ABORT_WIDTH 1 +#define IC_ENABLE_EN_INDEX 0 +#define IC_ENABLE_EN_WIDTH 1 +#define IC_ENABLE_STATUS_EN_INDEX 0 +#define IC_ENABLE_STATUS_EN_WIDTH 1 +#define IC_INTR_MASK_TX_EMPTY_INDEX 4 +#define IC_INTR_MASK_TX_EMPTY_WIDTH 1 +#define IC_RAW_INTR_STAT_RX_FULL_INDEX 2 +#define IC_RAW_INTR_STAT_RX_FULL_WIDTH 1 +#define IC_RAW_INTR_STAT_STOP_DET_INDEX 9 +#define IC_RAW_INTR_STAT_STOP_DET_WIDTH 1 +#define IC_RAW_INTR_STAT_TX_ABRT_INDEX 6 +#define IC_RAW_INTR_STAT_TX_ABRT_WIDTH 1 +#define IC_RAW_INTR_STAT_TX_EMPTY_INDEX 4 +#define IC_RAW_INTR_STAT_TX_EMPTY_WIDTH 1 + +/* I2C Control register value */ +#define IC_TX_ABRT_7B_ADDR_NOACK 0x0001 +#define IC_TX_ABRT_ARB_LOST 0x1000 + /* Descriptor/Packet entry bit positions and sizes */ #define RX_PACKET_ERRORS_CRC_INDEX 2 #define RX_PACKET_ERRORS_CRC_WIDTH 1 @@ -1027,6 +1256,10 @@ #define MDIO_PMA_10GBR_FECCTRL 0x00ab #endif +#ifndef MDIO_PCS_DIG_CTRL +#define MDIO_PCS_DIG_CTRL 0x8000 +#endif + #ifndef MDIO_AN_XNP #define MDIO_AN_XNP 0x0016 #endif @@ -1047,11 +1280,48 @@ #define MDIO_AN_INT 0x8002 #endif +#ifndef MDIO_VEND2_AN_ADVERTISE +#define MDIO_VEND2_AN_ADVERTISE 0x0004 +#endif + +#ifndef MDIO_VEND2_AN_LP_ABILITY +#define MDIO_VEND2_AN_LP_ABILITY 0x0005 +#endif + +#ifndef MDIO_VEND2_AN_CTRL +#define MDIO_VEND2_AN_CTRL 0x8001 +#endif + +#ifndef MDIO_VEND2_AN_STAT +#define MDIO_VEND2_AN_STAT 0x8002 +#endif + #ifndef MDIO_CTRL1_SPEED1G #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) #endif +#ifndef MDIO_VEND2_CTRL1_AN_ENABLE +#define MDIO_VEND2_CTRL1_AN_ENABLE BIT(12) +#endif + +#ifndef MDIO_VEND2_CTRL1_AN_RESTART +#define MDIO_VEND2_CTRL1_AN_RESTART BIT(9) +#endif + +#ifndef MDIO_VEND2_CTRL1_SS6 +#define MDIO_VEND2_CTRL1_SS6 BIT(6) +#endif + +#ifndef MDIO_VEND2_CTRL1_SS13 +#define MDIO_VEND2_CTRL1_SS13 BIT(13) +#endif + /* MDIO mask values */ +#define XGBE_AN_CL73_INT_CMPLT BIT(0) +#define XGBE_AN_CL73_INC_LINK BIT(1) +#define XGBE_AN_CL73_PG_RCV BIT(2) +#define XGBE_AN_CL73_INT_MASK 0x07 + #define XGBE_XNP_MCF_NULL_MESSAGE 0x001 #define XGBE_XNP_ACK_PROCESSED BIT(12) #define XGBE_XNP_MP_FORMATTED BIT(13) @@ -1060,6 +1330,19 @@ #define XGBE_KR_TRAINING_START BIT(0) #define XGBE_KR_TRAINING_ENABLE BIT(1) +#define XGBE_PCS_CL37_BP BIT(12) + +#define XGBE_AN_CL37_INT_CMPLT BIT(0) +#define XGBE_AN_CL37_INT_MASK 0x01 + +#define XGBE_AN_CL37_HD_MASK 0x40 +#define XGBE_AN_CL37_FD_MASK 0x20 + +#define XGBE_AN_CL37_PCS_MODE_MASK 0x06 +#define XGBE_AN_CL37_PCS_MODE_BASEX 0x00 +#define XGBE_AN_CL37_PCS_MODE_SGMII 0x04 +#define XGBE_AN_CL37_TX_CONFIG_MASK 0x08 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable @@ -1195,12 +1478,28 @@ do { \ /* Macros for building, reading or writing register values or bits * within the register values of XPCS registers. */ -#define XPCS_IOWRITE(_pdata, _off, _val) \ +#define XPCS_GET_BITS(_var, _prefix, _field) \ + GET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH) + +#define XPCS_SET_BITS(_var, _prefix, _field, _val) \ + SET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH, (_val)) + +#define XPCS32_IOWRITE(_pdata, _off, _val) \ iowrite32(_val, (_pdata)->xpcs_regs + (_off)) -#define XPCS_IOREAD(_pdata, _off) \ +#define XPCS32_IOREAD(_pdata, _off) \ ioread32((_pdata)->xpcs_regs + (_off)) +#define XPCS16_IOWRITE(_pdata, _off, _val) \ + iowrite16(_val, (_pdata)->xpcs_regs + (_off)) + +#define XPCS16_IOREAD(_pdata, _off) \ + ioread16((_pdata)->xpcs_regs + (_off)) + /* Macros for building, reading or writing register values or bits * within the register values of SerDes integration registers. */ @@ -1278,6 +1577,72 @@ do { \ } while (0) /* Macros for building, reading or writing register values or bits + * within the register values of MAC Control registers. + */ +#define XP_GET_BITS(_var, _prefix, _field) \ + GET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH) + +#define XP_SET_BITS(_var, _prefix, _field, _val) \ + SET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH, (_val)) + +#define XP_IOREAD(_pdata, _reg) \ + ioread32((_pdata)->xprop_regs + (_reg)) + +#define XP_IOREAD_BITS(_pdata, _reg, _field) \ + GET_BITS(XP_IOREAD((_pdata), (_reg)), \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH) + +#define XP_IOWRITE(_pdata, _reg, _val) \ + iowrite32((_val), (_pdata)->xprop_regs + (_reg)) + +#define XP_IOWRITE_BITS(_pdata, _reg, _field, _val) \ +do { \ + u32 reg_val = XP_IOREAD((_pdata), (_reg)); \ + SET_BITS(reg_val, \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH, (_val)); \ + XP_IOWRITE((_pdata), (_reg), reg_val); \ +} while (0) + +/* Macros for building, reading or writing register values or bits + * within the register values of I2C Control registers. + */ +#define XI2C_GET_BITS(_var, _prefix, _field) \ + GET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH) + +#define XI2C_SET_BITS(_var, _prefix, _field, _val) \ + SET_BITS((_var), \ + _prefix##_##_field##_INDEX, \ + _prefix##_##_field##_WIDTH, (_val)) + +#define XI2C_IOREAD(_pdata, _reg) \ + ioread32((_pdata)->xi2c_regs + (_reg)) + +#define XI2C_IOREAD_BITS(_pdata, _reg, _field) \ + GET_BITS(XI2C_IOREAD((_pdata), (_reg)), \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH) + +#define XI2C_IOWRITE(_pdata, _reg, _val) \ + iowrite32((_val), (_pdata)->xi2c_regs + (_reg)) + +#define XI2C_IOWRITE_BITS(_pdata, _reg, _field, _val) \ +do { \ + u32 reg_val = XI2C_IOREAD((_pdata), (_reg)); \ + SET_BITS(reg_val, \ + _reg##_##_field##_INDEX, \ + _reg##_##_field##_WIDTH, (_val)); \ + XI2C_IOWRITE((_pdata), (_reg), reg_val); \ +} while (0) + +/* Macros for building, reading or writing register values or bits * using MDIO. Different from above because of the use of standardized * Linux include values. No shifting is performed with the bit * operations, everything works on mask values. diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index 96f485ab612e..0c0140decbc2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -316,6 +316,126 @@ static const struct file_operations xpcs_reg_value_fops = { .write = xpcs_reg_value_write, }; +static ssize_t xprop_reg_addr_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xprop_reg); +} + +static ssize_t xprop_reg_addr_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_write(buffer, count, ppos, + &pdata->debugfs_xprop_reg); +} + +static ssize_t xprop_reg_value_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + + value = XP_IOREAD(pdata, pdata->debugfs_xprop_reg); + + return xgbe_common_read(buffer, count, ppos, value); +} + +static ssize_t xprop_reg_value_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + ssize_t len; + + len = xgbe_common_write(buffer, count, ppos, &value); + if (len < 0) + return len; + + XP_IOWRITE(pdata, pdata->debugfs_xprop_reg, value); + + return len; +} + +static const struct file_operations xprop_reg_addr_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xprop_reg_addr_read, + .write = xprop_reg_addr_write, +}; + +static const struct file_operations xprop_reg_value_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xprop_reg_value_read, + .write = xprop_reg_value_write, +}; + +static ssize_t xi2c_reg_addr_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xi2c_reg); +} + +static ssize_t xi2c_reg_addr_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + + return xgbe_common_write(buffer, count, ppos, + &pdata->debugfs_xi2c_reg); +} + +static ssize_t xi2c_reg_value_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + + value = XI2C_IOREAD(pdata, pdata->debugfs_xi2c_reg); + + return xgbe_common_read(buffer, count, ppos, value); +} + +static ssize_t xi2c_reg_value_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct xgbe_prv_data *pdata = filp->private_data; + unsigned int value; + ssize_t len; + + len = xgbe_common_write(buffer, count, ppos, &value); + if (len < 0) + return len; + + XI2C_IOWRITE(pdata, pdata->debugfs_xi2c_reg, value); + + return len; +} + +static const struct file_operations xi2c_reg_addr_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xi2c_reg_addr_read, + .write = xi2c_reg_addr_write, +}; + +static const struct file_operations xi2c_reg_value_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = xi2c_reg_value_read, + .write = xi2c_reg_value_write, +}; + void xgbe_debugfs_init(struct xgbe_prv_data *pdata) { struct dentry *pfile; @@ -367,6 +487,38 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata) if (!pfile) netdev_err(pdata->netdev, "debugfs_create_file failed\n"); + if (pdata->xprop_regs) { + pfile = debugfs_create_file("xprop_register", 0600, + pdata->xgbe_debugfs, pdata, + &xprop_reg_addr_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + + pfile = debugfs_create_file("xprop_register_value", 0600, + pdata->xgbe_debugfs, pdata, + &xprop_reg_value_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + } + + if (pdata->xi2c_regs) { + pfile = debugfs_create_file("xi2c_register", 0600, + pdata->xgbe_debugfs, pdata, + &xi2c_reg_addr_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + + pfile = debugfs_create_file("xi2c_register_value", 0600, + pdata->xgbe_debugfs, pdata, + &xi2c_reg_value_fops); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_file failed\n"); + } + kfree(buf); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 1babcc11a248..30056e24e1fc 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -123,6 +123,11 @@ #include "xgbe.h" #include "xgbe-common.h" +static inline unsigned int xgbe_get_max_frame(struct xgbe_prv_data *pdata) +{ + return pdata->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; +} + static unsigned int xgbe_usec_to_riwt(struct xgbe_prv_data *pdata, unsigned int usec) { @@ -491,6 +496,27 @@ static void xgbe_config_rss(struct xgbe_prv_data *pdata) "error configuring RSS, RSS disabled\n"); } +static bool xgbe_is_pfc_queue(struct xgbe_prv_data *pdata, + unsigned int queue) +{ + unsigned int prio, tc; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + /* Does this queue handle the priority? */ + if (pdata->prio2q_map[prio] != queue) + continue; + + /* Get the Traffic Class for this priority */ + tc = pdata->ets->prio_tc[prio]; + + /* Check if PFC is enabled for this traffic class */ + if (pdata->pfc->pfc_en & (1 << tc)) + return true; + } + + return false; +} + static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) { unsigned int max_q_count, q_count; @@ -528,27 +554,14 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) for (i = 0; i < pdata->rx_q_count; i++) { unsigned int ehfc = 0; - if (pfc && ets) { - unsigned int prio; - - for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { - unsigned int tc; - - /* Does this queue handle the priority? */ - if (pdata->prio2q_map[prio] != i) - continue; - - /* Get the Traffic Class for this priority */ - tc = ets->prio_tc[prio]; - - /* Check if flow control should be enabled */ - if (pfc->pfc_en & (1 << tc)) { + if (pdata->rx_rfd[i]) { + /* Flow control thresholds are established */ + if (pfc && ets) { + if (xgbe_is_pfc_queue(pdata, i)) ehfc = 1; - break; - } + } else { + ehfc = 1; } - } else { - ehfc = 1; } XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, ehfc); @@ -633,6 +646,11 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata) unsigned int dma_ch_isr, dma_ch_ier; unsigned int i; + /* Set the interrupt mode if supported */ + if (pdata->channel_irq_mode) + XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM, + pdata->channel_irq_mode); + channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { /* Clear all the interrupts which are set */ @@ -654,19 +672,21 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata) if (channel->tx_ring) { /* Enable the following Tx interrupts * TIE - Transmit Interrupt Enable (unless using - * per channel interrupts) + * per channel interrupts in edge triggered + * mode) */ - if (!pdata->per_channel_irq) + if (!pdata->per_channel_irq || pdata->channel_irq_mode) XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1); } if (channel->rx_ring) { /* Enable following Rx interrupts * RBUE - Receive Buffer Unavailable Enable * RIE - Receive Interrupt Enable (unless using - * per channel interrupts) + * per channel interrupts in edge triggered + * mode) */ XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1); - if (!pdata->per_channel_irq) + if (!pdata->per_channel_irq || pdata->channel_irq_mode) XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1); } @@ -702,34 +722,90 @@ static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata) /* Enable all counter interrupts */ XGMAC_IOWRITE_BITS(pdata, MMC_RIER, ALL_INTERRUPTS, 0xffffffff); XGMAC_IOWRITE_BITS(pdata, MMC_TIER, ALL_INTERRUPTS, 0xffffffff); + + /* Enable MDIO single command completion interrupt */ + XGMAC_IOWRITE_BITS(pdata, MAC_MDIOIER, SNGLCOMPIE, 1); } -static int xgbe_set_gmii_speed(struct xgbe_prv_data *pdata) +static void xgbe_enable_ecc_interrupts(struct xgbe_prv_data *pdata) { - if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x3) - return 0; + unsigned int ecc_isr, ecc_ier = 0; - XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x3); + if (!pdata->vdata->ecc_support) + return; - return 0; + /* Clear all the interrupts which are set */ + ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR); + XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr); + + /* Enable ECC interrupts */ + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 1); + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 1); + + XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier); } -static int xgbe_set_gmii_2500_speed(struct xgbe_prv_data *pdata) +static void xgbe_disable_ecc_ded(struct xgbe_prv_data *pdata) { - if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x2) - return 0; + unsigned int ecc_ier; - XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x2); + ecc_ier = XP_IOREAD(pdata, XP_ECC_IER); - return 0; + /* Disable ECC DED interrupts */ + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 0); + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 0); + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 0); + + XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier); } -static int xgbe_set_xgmii_speed(struct xgbe_prv_data *pdata) +static void xgbe_disable_ecc_sec(struct xgbe_prv_data *pdata, + enum xgbe_ecc_sec sec) { - if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0) - return 0; + unsigned int ecc_ier; + + ecc_ier = XP_IOREAD(pdata, XP_ECC_IER); + + /* Disable ECC SEC interrupt */ + switch (sec) { + case XGBE_ECC_SEC_TX: + XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 0); + break; + case XGBE_ECC_SEC_RX: + XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 0); + break; + case XGBE_ECC_SEC_DESC: + XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 0); + break; + } - XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0); + XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier); +} + +static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed) +{ + unsigned int ss; + + switch (speed) { + case SPEED_1000: + ss = 0x03; + break; + case SPEED_2500: + ss = 0x02; + break; + case SPEED_10000: + ss = 0x00; + break; + default: + return -EINVAL; + } + + if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) != ss) + XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, ss); return 0; } @@ -1019,8 +1095,101 @@ static int xgbe_config_rx_mode(struct xgbe_prv_data *pdata) return 0; } -static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, - int mmd_reg) +static int xgbe_clr_gpio(struct xgbe_prv_data *pdata, unsigned int gpio) +{ + unsigned int reg; + + if (gpio > 16) + return -EINVAL; + + reg = XGMAC_IOREAD(pdata, MAC_GPIOSR); + + reg &= ~(1 << (gpio + 16)); + XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg); + + return 0; +} + +static int xgbe_set_gpio(struct xgbe_prv_data *pdata, unsigned int gpio) +{ + unsigned int reg; + + if (gpio > 16) + return -EINVAL; + + reg = XGMAC_IOREAD(pdata, MAC_GPIOSR); + + reg |= (1 << (gpio + 16)); + XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg); + + return 0; +} + +static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) +{ + unsigned long flags; + unsigned int mmd_address, index, offset; + int mmd_data; + + if (mmd_reg & MII_ADDR_C45) + mmd_address = mmd_reg & ~MII_ADDR_C45; + else + mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + + /* The PCS registers are accessed using mmio. The underlying + * management interface uses indirect addressing to access the MMD + * register sets. This requires accessing of the PCS register in two + * phases, an address phase and a data phase. + * + * The mmio interface is based on 16-bit offsets and values. All + * register offsets must therefore be adjusted by left shifting the + * offset 1 bit and reading 16 bits of data. + */ + mmd_address <<= 1; + index = mmd_address & ~pdata->xpcs_window_mask; + offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); + + spin_lock_irqsave(&pdata->xpcs_lock, flags); + XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index); + mmd_data = XPCS16_IOREAD(pdata, offset); + spin_unlock_irqrestore(&pdata->xpcs_lock, flags); + + return mmd_data; +} + +static void xgbe_write_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg, int mmd_data) +{ + unsigned long flags; + unsigned int mmd_address, index, offset; + + if (mmd_reg & MII_ADDR_C45) + mmd_address = mmd_reg & ~MII_ADDR_C45; + else + mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff); + + /* The PCS registers are accessed using mmio. The underlying + * management interface uses indirect addressing to access the MMD + * register sets. This requires accessing of the PCS register in two + * phases, an address phase and a data phase. + * + * The mmio interface is based on 16-bit offsets and values. All + * register offsets must therefore be adjusted by left shifting the + * offset 1 bit and writing 16 bits of data. + */ + mmd_address <<= 1; + index = mmd_address & ~pdata->xpcs_window_mask; + offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask); + + spin_lock_irqsave(&pdata->xpcs_lock, flags); + XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index); + XPCS16_IOWRITE(pdata, offset, mmd_data); + spin_unlock_irqrestore(&pdata->xpcs_lock, flags); +} + +static int xgbe_read_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) { unsigned long flags; unsigned int mmd_address; @@ -1041,15 +1210,15 @@ static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, * offset 2 bits and reading 32 bits of data. */ spin_lock_irqsave(&pdata->xpcs_lock, flags); - XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8); - mmd_data = XPCS_IOREAD(pdata, (mmd_address & 0xff) << 2); + XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8); + mmd_data = XPCS32_IOREAD(pdata, (mmd_address & 0xff) << 2); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); return mmd_data; } -static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, - int mmd_reg, int mmd_data) +static void xgbe_write_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg, int mmd_data) { unsigned int mmd_address; unsigned long flags; @@ -1066,14 +1235,113 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, * * The mmio interface is based on 32-bit offsets and values. All * register offsets must therefore be adjusted by left shifting the - * offset 2 bits and reading 32 bits of data. + * offset 2 bits and writing 32 bits of data. */ spin_lock_irqsave(&pdata->xpcs_lock, flags); - XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8); - XPCS_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data); + XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8); + XPCS32_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data); spin_unlock_irqrestore(&pdata->xpcs_lock, flags); } +static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg) +{ + switch (pdata->vdata->xpcs_access) { + case XGBE_XPCS_ACCESS_V1: + return xgbe_read_mmd_regs_v1(pdata, prtad, mmd_reg); + + case XGBE_XPCS_ACCESS_V2: + default: + return xgbe_read_mmd_regs_v2(pdata, prtad, mmd_reg); + } +} + +static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, + int mmd_reg, int mmd_data) +{ + switch (pdata->vdata->xpcs_access) { + case XGBE_XPCS_ACCESS_V1: + return xgbe_write_mmd_regs_v1(pdata, prtad, mmd_reg, mmd_data); + + case XGBE_XPCS_ACCESS_V2: + default: + return xgbe_write_mmd_regs_v2(pdata, prtad, mmd_reg, mmd_data); + } +} + +static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, + int reg, u16 val) +{ + unsigned int mdio_sca, mdio_sccd; + + reinit_completion(&pdata->mdio_complete); + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, DATA, val); + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 1); + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1); + XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd); + + if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) { + netdev_err(pdata->netdev, "mdio write operation timed out\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, + int reg) +{ + unsigned int mdio_sca, mdio_sccd; + + reinit_completion(&pdata->mdio_complete); + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 3); + XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1); + XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd); + + if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) { + netdev_err(pdata->netdev, "mdio read operation timed out\n"); + return -ETIMEDOUT; + } + + return XGMAC_IOREAD_BITS(pdata, MAC_MDIOSCCDR, DATA); +} + +static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port, + enum xgbe_mdio_mode mode) +{ + unsigned int reg_val = 0; + + switch (mode) { + case XGBE_MDIO_MODE_CL22: + if (port > XGMAC_MAX_C22_PORT) + return -EINVAL; + reg_val |= (1 << port); + break; + case XGBE_MDIO_MODE_CL45: + break; + default: + return -EINVAL; + } + + XGMAC_IOWRITE(pdata, MAC_MDIOCL22R, reg_val); + + return 0; +} + static int xgbe_tx_complete(struct xgbe_ring_desc *rdesc) { return !XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN); @@ -1264,14 +1532,21 @@ static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata) static u64 xgbe_get_tx_tstamp(struct xgbe_prv_data *pdata) { - unsigned int tx_snr; + unsigned int tx_snr, tx_ssr; u64 nsec; - tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR); + if (pdata->vdata->tx_tstamp_workaround) { + tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR); + tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR); + } else { + tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR); + tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR); + } + if (XGMAC_GET_BITS(tx_snr, MAC_TXSNR, TXTSSTSMIS)) return 0; - nsec = XGMAC_IOREAD(pdata, MAC_TXSSR); + nsec = tx_ssr; nsec *= NSEC_PER_SEC; nsec += tx_snr; @@ -1327,106 +1602,6 @@ static int xgbe_config_tstamp(struct xgbe_prv_data *pdata, return 0; } -static void xgbe_config_tc(struct xgbe_prv_data *pdata) -{ - unsigned int offset, queue, prio; - u8 i; - - netdev_reset_tc(pdata->netdev); - if (!pdata->num_tcs) - return; - - netdev_set_num_tc(pdata->netdev, pdata->num_tcs); - - for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) { - while ((queue < pdata->tx_q_count) && - (pdata->q2tc_map[queue] == i)) - queue++; - - netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n", - i, offset, queue - 1); - netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset); - offset = queue; - } - - if (!pdata->ets) - return; - - for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) - netdev_set_prio_tc_map(pdata->netdev, prio, - pdata->ets->prio_tc[prio]); -} - -static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) -{ - struct ieee_ets *ets = pdata->ets; - unsigned int total_weight, min_weight, weight; - unsigned int mask, reg, reg_val; - unsigned int i, prio; - - if (!ets) - return; - - /* Set Tx to deficit weighted round robin scheduling algorithm (when - * traffic class is using ETS algorithm) - */ - XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR); - - /* Set Traffic Class algorithms */ - total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt; - min_weight = total_weight / 100; - if (!min_weight) - min_weight = 1; - - for (i = 0; i < pdata->hw_feat.tc_cnt; i++) { - /* Map the priorities to the traffic class */ - mask = 0; - for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { - if (ets->prio_tc[prio] == i) - mask |= (1 << prio); - } - mask &= 0xff; - - netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n", - i, mask); - reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG)); - reg_val = XGMAC_IOREAD(pdata, reg); - - reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3)); - reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3)); - - XGMAC_IOWRITE(pdata, reg, reg_val); - - /* Set the traffic class algorithm */ - switch (ets->tc_tsa[i]) { - case IEEE_8021QAZ_TSA_STRICT: - netif_dbg(pdata, drv, pdata->netdev, - "TC%u using SP\n", i); - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, - MTL_TSA_SP); - break; - case IEEE_8021QAZ_TSA_ETS: - weight = total_weight * ets->tc_tx_bw[i] / 100; - weight = clamp(weight, min_weight, total_weight); - - netif_dbg(pdata, drv, pdata->netdev, - "TC%u using DWRR (weight %u)\n", i, weight); - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, - MTL_TSA_ETS); - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW, - weight); - break; - } - } - - xgbe_config_tc(pdata); -} - -static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata) -{ - xgbe_config_flow_control(pdata); -} - static void xgbe_tx_start_xmit(struct xgbe_channel *channel, struct xgbe_ring *ring) { @@ -1901,7 +2076,7 @@ static int xgbe_disable_int(struct xgbe_channel *channel, return 0; } -static int xgbe_exit(struct xgbe_prv_data *pdata) +static int __xgbe_exit(struct xgbe_prv_data *pdata) { unsigned int count = 2000; @@ -1923,6 +2098,20 @@ static int xgbe_exit(struct xgbe_prv_data *pdata) return 0; } +static int xgbe_exit(struct xgbe_prv_data *pdata) +{ + int ret; + + /* To guard against possible incorrectly generated interrupts, + * issue the software reset twice. + */ + ret = __xgbe_exit(pdata); + if (ret) + return ret; + + return __xgbe_exit(pdata); +} + static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata) { unsigned int i, count; @@ -2000,61 +2189,331 @@ static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MTL_OMR, RAA, MTL_RAA_SP); } -static unsigned int xgbe_calculate_per_queue_fifo(unsigned int fifo_size, - unsigned int queue_count) +static void xgbe_queue_flow_control_threshold(struct xgbe_prv_data *pdata, + unsigned int queue, + unsigned int q_fifo_size) +{ + unsigned int frame_fifo_size; + unsigned int rfa, rfd; + + frame_fifo_size = XGMAC_FLOW_CONTROL_ALIGN(xgbe_get_max_frame(pdata)); + + if (pdata->pfcq[queue] && (q_fifo_size > pdata->pfc_rfa)) { + /* PFC is active for this queue */ + rfa = pdata->pfc_rfa; + rfd = rfa + frame_fifo_size; + if (rfd > XGMAC_FLOW_CONTROL_MAX) + rfd = XGMAC_FLOW_CONTROL_MAX; + if (rfa >= XGMAC_FLOW_CONTROL_MAX) + rfa = XGMAC_FLOW_CONTROL_MAX - XGMAC_FLOW_CONTROL_UNIT; + } else { + /* This path deals with just maximum frame sizes which are + * limited to a jumbo frame of 9,000 (plus headers, etc.) + * so we can never exceed the maximum allowable RFA/RFD + * values. + */ + if (q_fifo_size <= 2048) { + /* rx_rfd to zero to signal no flow control */ + pdata->rx_rfa[queue] = 0; + pdata->rx_rfd[queue] = 0; + return; + } + + if (q_fifo_size <= 4096) { + /* Between 2048 and 4096 */ + pdata->rx_rfa[queue] = 0; /* Full - 1024 bytes */ + pdata->rx_rfd[queue] = 1; /* Full - 1536 bytes */ + return; + } + + if (q_fifo_size <= frame_fifo_size) { + /* Between 4096 and max-frame */ + pdata->rx_rfa[queue] = 2; /* Full - 2048 bytes */ + pdata->rx_rfd[queue] = 5; /* Full - 3584 bytes */ + return; + } + + if (q_fifo_size <= (frame_fifo_size * 3)) { + /* Between max-frame and 3 max-frames, + * trigger if we get just over a frame of data and + * resume when we have just under half a frame left. + */ + rfa = q_fifo_size - frame_fifo_size; + rfd = rfa + (frame_fifo_size / 2); + } else { + /* Above 3 max-frames - trigger when just over + * 2 frames of space available + */ + rfa = frame_fifo_size * 2; + rfa += XGMAC_FLOW_CONTROL_UNIT; + rfd = rfa + frame_fifo_size; + } + } + + pdata->rx_rfa[queue] = XGMAC_FLOW_CONTROL_VALUE(rfa); + pdata->rx_rfd[queue] = XGMAC_FLOW_CONTROL_VALUE(rfd); +} + +static void xgbe_calculate_flow_control_threshold(struct xgbe_prv_data *pdata, + unsigned int *fifo) { unsigned int q_fifo_size; - unsigned int p_fifo; + unsigned int i; - /* Calculate the configured fifo size */ - q_fifo_size = 1 << (fifo_size + 7); + for (i = 0; i < pdata->rx_q_count; i++) { + q_fifo_size = (fifo[i] + 1) * XGMAC_FIFO_UNIT; + xgbe_queue_flow_control_threshold(pdata, i, q_fifo_size); + } +} + +static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < pdata->rx_q_count; i++) { + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, + pdata->rx_rfa[i]); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, + pdata->rx_rfd[i]); + } +} + +static unsigned int xgbe_get_tx_fifo_size(struct xgbe_prv_data *pdata) +{ /* The configured value may not be the actual amount of fifo RAM */ - q_fifo_size = min_t(unsigned int, XGBE_FIFO_MAX, q_fifo_size); + return min_t(unsigned int, pdata->tx_max_fifo_size, + pdata->hw_feat.tx_fifo_size); +} - q_fifo_size = q_fifo_size / queue_count; +static unsigned int xgbe_get_rx_fifo_size(struct xgbe_prv_data *pdata) +{ + /* The configured value may not be the actual amount of fifo RAM */ + return min_t(unsigned int, pdata->rx_max_fifo_size, + pdata->hw_feat.rx_fifo_size); +} - /* Each increment in the queue fifo size represents 256 bytes of - * fifo, with 0 representing 256 bytes. Distribute the fifo equally - * between the queues. +static void xgbe_calculate_equal_fifo(unsigned int fifo_size, + unsigned int queue_count, + unsigned int *fifo) +{ + unsigned int q_fifo_size; + unsigned int p_fifo; + unsigned int i; + + q_fifo_size = fifo_size / queue_count; + + /* Calculate the fifo setting by dividing the queue's fifo size + * by the fifo allocation increment (with 0 representing the + * base allocation increment so decrement the result by 1). */ - p_fifo = q_fifo_size / 256; + p_fifo = q_fifo_size / XGMAC_FIFO_UNIT; if (p_fifo) p_fifo--; - return p_fifo; + /* Distribute the fifo equally amongst the queues */ + for (i = 0; i < queue_count; i++) + fifo[i] = p_fifo; +} + +static unsigned int xgbe_set_nonprio_fifos(unsigned int fifo_size, + unsigned int queue_count, + unsigned int *fifo) +{ + unsigned int i; + + BUILD_BUG_ON_NOT_POWER_OF_2(XGMAC_FIFO_MIN_ALLOC); + + if (queue_count <= IEEE_8021QAZ_MAX_TCS) + return fifo_size; + + /* Rx queues 9 and up are for specialized packets, + * such as PTP or DCB control packets, etc. and + * don't require a large fifo + */ + for (i = IEEE_8021QAZ_MAX_TCS; i < queue_count; i++) { + fifo[i] = (XGMAC_FIFO_MIN_ALLOC / XGMAC_FIFO_UNIT) - 1; + fifo_size -= XGMAC_FIFO_MIN_ALLOC; + } + + return fifo_size; +} + +static unsigned int xgbe_get_pfc_delay(struct xgbe_prv_data *pdata) +{ + unsigned int delay; + + /* If a delay has been provided, use that */ + if (pdata->pfc->delay) + return pdata->pfc->delay / 8; + + /* Allow for two maximum size frames */ + delay = xgbe_get_max_frame(pdata); + delay += XGMAC_ETH_PREAMBLE; + delay *= 2; + + /* Allow for PFC frame */ + delay += XGMAC_PFC_DATA_LEN; + delay += ETH_HLEN + ETH_FCS_LEN; + delay += XGMAC_ETH_PREAMBLE; + + /* Allow for miscellaneous delays (LPI exit, cable, etc.) */ + delay += XGMAC_PFC_DELAYS; + + return delay; +} + +static unsigned int xgbe_get_pfc_queues(struct xgbe_prv_data *pdata) +{ + unsigned int count, prio_queues; + unsigned int i; + + if (!pdata->pfc->pfc_en) + return 0; + + count = 0; + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); + for (i = 0; i < prio_queues; i++) { + if (!xgbe_is_pfc_queue(pdata, i)) + continue; + + pdata->pfcq[i] = 1; + count++; + } + + return count; +} + +static void xgbe_calculate_dcb_fifo(struct xgbe_prv_data *pdata, + unsigned int fifo_size, + unsigned int *fifo) +{ + unsigned int q_fifo_size, rem_fifo, addn_fifo; + unsigned int prio_queues; + unsigned int pfc_count; + unsigned int i; + + q_fifo_size = XGMAC_FIFO_ALIGN(xgbe_get_max_frame(pdata)); + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); + pfc_count = xgbe_get_pfc_queues(pdata); + + if (!pfc_count || ((q_fifo_size * prio_queues) > fifo_size)) { + /* No traffic classes with PFC enabled or can't do lossless */ + xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo); + return; + } + + /* Calculate how much fifo we have to play with */ + rem_fifo = fifo_size - (q_fifo_size * prio_queues); + + /* Calculate how much more than base fifo PFC needs, which also + * becomes the threshold activation point (RFA) + */ + pdata->pfc_rfa = xgbe_get_pfc_delay(pdata); + pdata->pfc_rfa = XGMAC_FLOW_CONTROL_ALIGN(pdata->pfc_rfa); + + if (pdata->pfc_rfa > q_fifo_size) { + addn_fifo = pdata->pfc_rfa - q_fifo_size; + addn_fifo = XGMAC_FIFO_ALIGN(addn_fifo); + } else { + addn_fifo = 0; + } + + /* Calculate DCB fifo settings: + * - distribute remaining fifo between the VLAN priority + * queues based on traffic class PFC enablement and overall + * priority (0 is lowest priority, so start at highest) + */ + i = prio_queues; + while (i > 0) { + i--; + + fifo[i] = (q_fifo_size / XGMAC_FIFO_UNIT) - 1; + + if (!pdata->pfcq[i] || !addn_fifo) + continue; + + if (addn_fifo > rem_fifo) { + netdev_warn(pdata->netdev, + "RXq%u cannot set needed fifo size\n", i); + if (!rem_fifo) + continue; + + addn_fifo = rem_fifo; + } + + fifo[i] += (addn_fifo / XGMAC_FIFO_UNIT); + rem_fifo -= addn_fifo; + } + + if (rem_fifo) { + unsigned int inc_fifo = rem_fifo / prio_queues; + + /* Distribute remaining fifo across queues */ + for (i = 0; i < prio_queues; i++) + fifo[i] += (inc_fifo / XGMAC_FIFO_UNIT); + } } static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata) { unsigned int fifo_size; + unsigned int fifo[XGBE_MAX_QUEUES]; unsigned int i; - fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.tx_fifo_size, - pdata->tx_q_count); + fifo_size = xgbe_get_tx_fifo_size(pdata); + + xgbe_calculate_equal_fifo(fifo_size, pdata->tx_q_count, fifo); for (i = 0; i < pdata->tx_q_count; i++) - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo_size); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo[i]); netif_info(pdata, drv, pdata->netdev, "%d Tx hardware queues, %d byte fifo per queue\n", - pdata->tx_q_count, ((fifo_size + 1) * 256)); + pdata->tx_q_count, ((fifo[0] + 1) * XGMAC_FIFO_UNIT)); } static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata) { unsigned int fifo_size; + unsigned int fifo[XGBE_MAX_QUEUES]; + unsigned int prio_queues; unsigned int i; - fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.rx_fifo_size, - pdata->rx_q_count); + /* Clear any DCB related fifo/queue information */ + memset(pdata->pfcq, 0, sizeof(pdata->pfcq)); + pdata->pfc_rfa = 0; + + fifo_size = xgbe_get_rx_fifo_size(pdata); + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); + + /* Assign a minimum fifo to the non-VLAN priority queues */ + fifo_size = xgbe_set_nonprio_fifos(fifo_size, pdata->rx_q_count, fifo); + + if (pdata->pfc && pdata->ets) + xgbe_calculate_dcb_fifo(pdata, fifo_size, fifo); + else + xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo); for (i = 0; i < pdata->rx_q_count; i++) - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo_size); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo[i]); - netif_info(pdata, drv, pdata->netdev, - "%d Rx hardware queues, %d byte fifo per queue\n", - pdata->rx_q_count, ((fifo_size + 1) * 256)); + xgbe_calculate_flow_control_threshold(pdata, fifo); + xgbe_config_flow_control_threshold(pdata); + + if (pdata->pfc && pdata->ets && pdata->pfc->pfc_en) { + netif_info(pdata, drv, pdata->netdev, + "%u Rx hardware queues\n", pdata->rx_q_count); + for (i = 0; i < pdata->rx_q_count; i++) + netif_info(pdata, drv, pdata->netdev, + "RxQ%u, %u byte fifo queue\n", i, + ((fifo[i] + 1) * XGMAC_FIFO_UNIT)); + } else { + netif_info(pdata, drv, pdata->netdev, + "%u Rx hardware queues, %u byte fifo per queue\n", + pdata->rx_q_count, + ((fifo[0] + 1) * XGMAC_FIFO_UNIT)); + } } static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) @@ -2090,8 +2549,7 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) } /* Map the 8 VLAN priority values to available MTL Rx queues */ - prio_queues = min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, - pdata->rx_q_count); + prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count); ppq = IEEE_8021QAZ_MAX_TCS / prio_queues; ppq_extra = IEEE_8021QAZ_MAX_TCS % prio_queues; @@ -2139,16 +2597,120 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata) } } -static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata) +static void xgbe_config_tc(struct xgbe_prv_data *pdata) { - unsigned int i; + unsigned int offset, queue, prio; + u8 i; - for (i = 0; i < pdata->rx_q_count; i++) { - /* Activate flow control when less than 4k left in fifo */ - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, 2); + netdev_reset_tc(pdata->netdev); + if (!pdata->num_tcs) + return; + + netdev_set_num_tc(pdata->netdev, pdata->num_tcs); + + for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) { + while ((queue < pdata->tx_q_count) && + (pdata->q2tc_map[queue] == i)) + queue++; - /* De-activate flow control when more than 6k left in fifo */ - XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, 4); + netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n", + i, offset, queue - 1); + netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset); + offset = queue; + } + + if (!pdata->ets) + return; + + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) + netdev_set_prio_tc_map(pdata->netdev, prio, + pdata->ets->prio_tc[prio]); +} + +static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata) +{ + struct ieee_ets *ets = pdata->ets; + unsigned int total_weight, min_weight, weight; + unsigned int mask, reg, reg_val; + unsigned int i, prio; + + if (!ets) + return; + + /* Set Tx to deficit weighted round robin scheduling algorithm (when + * traffic class is using ETS algorithm) + */ + XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR); + + /* Set Traffic Class algorithms */ + total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt; + min_weight = total_weight / 100; + if (!min_weight) + min_weight = 1; + + for (i = 0; i < pdata->hw_feat.tc_cnt; i++) { + /* Map the priorities to the traffic class */ + mask = 0; + for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) { + if (ets->prio_tc[prio] == i) + mask |= (1 << prio); + } + mask &= 0xff; + + netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n", + i, mask); + reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG)); + reg_val = XGMAC_IOREAD(pdata, reg); + + reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3)); + reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3)); + + XGMAC_IOWRITE(pdata, reg, reg_val); + + /* Set the traffic class algorithm */ + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + netif_dbg(pdata, drv, pdata->netdev, + "TC%u using SP\n", i); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, + MTL_TSA_SP); + break; + case IEEE_8021QAZ_TSA_ETS: + weight = total_weight * ets->tc_tx_bw[i] / 100; + weight = clamp(weight, min_weight, total_weight); + + netif_dbg(pdata, drv, pdata->netdev, + "TC%u using DWRR (weight %u)\n", i, weight); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA, + MTL_TSA_ETS); + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW, + weight); + break; + } + } + + xgbe_config_tc(pdata); +} + +static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata) +{ + if (!test_bit(XGBE_DOWN, &pdata->dev_state)) { + /* Just stop the Tx queues while Rx fifo is changed */ + netif_tx_stop_all_queues(pdata->netdev); + + /* Suspend Rx so that fifo's can be adjusted */ + pdata->hw_if.disable_rx(pdata); + } + + xgbe_config_rx_fifo_size(pdata); + xgbe_config_flow_control(pdata); + + if (!test_bit(XGBE_DOWN, &pdata->dev_state)) { + /* Resume Rx */ + pdata->hw_if.enable_rx(pdata); + + /* Resume Tx queues */ + netif_tx_start_all_queues(pdata->netdev); } } @@ -2175,19 +2737,7 @@ static void xgbe_config_jumbo_enable(struct xgbe_prv_data *pdata) static void xgbe_config_mac_speed(struct xgbe_prv_data *pdata) { - switch (pdata->phy_speed) { - case SPEED_10000: - xgbe_set_xgmii_speed(pdata); - break; - - case SPEED_2500: - xgbe_set_gmii_2500_speed(pdata); - break; - - case SPEED_1000: - xgbe_set_gmii_speed(pdata); - break; - } + xgbe_set_speed(pdata, pdata->phy_speed); } static void xgbe_config_checksum_offload(struct xgbe_prv_data *pdata) @@ -2223,17 +2773,33 @@ static u64 xgbe_mmc_read(struct xgbe_prv_data *pdata, unsigned int reg_lo) bool read_hi; u64 val; - switch (reg_lo) { - /* These registers are always 64 bit */ - case MMC_TXOCTETCOUNT_GB_LO: - case MMC_TXOCTETCOUNT_G_LO: - case MMC_RXOCTETCOUNT_GB_LO: - case MMC_RXOCTETCOUNT_G_LO: - read_hi = true; - break; + if (pdata->vdata->mmc_64bit) { + switch (reg_lo) { + /* These registers are always 32 bit */ + case MMC_RXRUNTERROR: + case MMC_RXJABBERERROR: + case MMC_RXUNDERSIZE_G: + case MMC_RXOVERSIZE_G: + case MMC_RXWATCHDOGERROR: + read_hi = false; + break; - default: - read_hi = false; + default: + read_hi = true; + } + } else { + switch (reg_lo) { + /* These registers are always 64 bit */ + case MMC_TXOCTETCOUNT_GB_LO: + case MMC_TXOCTETCOUNT_G_LO: + case MMC_RXOCTETCOUNT_GB_LO: + case MMC_RXOCTETCOUNT_G_LO: + read_hi = true; + break; + + default: + read_hi = false; + } } val = XGMAC_IOREAD(pdata, reg_lo); @@ -2563,20 +3129,48 @@ static void xgbe_config_mmc(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MMC_CR, CR, 1); } +static void xgbe_txq_prepare_tx_stop(struct xgbe_prv_data *pdata, + unsigned int queue) +{ + unsigned int tx_status; + unsigned long tx_timeout; + + /* The Tx engine cannot be stopped if it is actively processing + * packets. Wait for the Tx queue to empty the Tx fifo. Don't + * wait forever though... + */ + tx_timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ); + while (time_before(jiffies, tx_timeout)) { + tx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_TQDR); + if ((XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TRCSTS) != 1) && + (XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TXQSTS) == 0)) + break; + + usleep_range(500, 1000); + } + + if (!time_before(jiffies, tx_timeout)) + netdev_info(pdata->netdev, + "timed out waiting for Tx queue %u to empty\n", + queue); +} + static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata, - struct xgbe_channel *channel) + unsigned int queue) { unsigned int tx_dsr, tx_pos, tx_qidx; unsigned int tx_status; unsigned long tx_timeout; + if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) > 0x20) + return xgbe_txq_prepare_tx_stop(pdata, queue); + /* Calculate the status register to read and the position within */ - if (channel->queue_index < DMA_DSRX_FIRST_QUEUE) { + if (queue < DMA_DSRX_FIRST_QUEUE) { tx_dsr = DMA_DSR0; - tx_pos = (channel->queue_index * DMA_DSR_Q_WIDTH) + - DMA_DSR0_TPS_START; + tx_pos = (queue * DMA_DSR_Q_WIDTH) + DMA_DSR0_TPS_START; } else { - tx_qidx = channel->queue_index - DMA_DSRX_FIRST_QUEUE; + tx_qidx = queue - DMA_DSRX_FIRST_QUEUE; tx_dsr = DMA_DSR1 + ((tx_qidx / DMA_DSRX_QPR) * DMA_DSRX_INC); tx_pos = ((tx_qidx % DMA_DSRX_QPR) * DMA_DSR_Q_WIDTH) + @@ -2601,7 +3195,7 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata, if (!time_before(jiffies, tx_timeout)) netdev_info(pdata->netdev, "timed out waiting for Tx DMA channel %u to stop\n", - channel->queue_index); + queue); } static void xgbe_enable_tx(struct xgbe_prv_data *pdata) @@ -2633,13 +3227,8 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata) unsigned int i; /* Prepare for Tx DMA channel stop */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) - break; - - xgbe_prepare_tx_stop(pdata, channel); - } + for (i = 0; i < pdata->tx_q_count; i++) + xgbe_prepare_tx_stop(pdata, i); /* Disable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); @@ -2763,13 +3352,8 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata) unsigned int i; /* Prepare for Tx DMA channel stop */ - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (!channel->tx_ring) - break; - - xgbe_prepare_tx_stop(pdata, channel); - } + for (i = 0; i < pdata->tx_q_count; i++) + xgbe_prepare_tx_stop(pdata, i); /* Disable MAC Tx */ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0); @@ -2856,12 +3440,10 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_rx_threshold(pdata, pdata->rx_threshold); xgbe_config_tx_fifo_size(pdata); xgbe_config_rx_fifo_size(pdata); - xgbe_config_flow_control_threshold(pdata); /*TODO: Error Packet and undersized good Packet forwarding enable (FEP and FUP) */ xgbe_config_dcb_tc(pdata); - xgbe_config_dcb_pfc(pdata); xgbe_enable_mtl_interrupts(pdata); /* @@ -2877,6 +3459,11 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_mmc(pdata); xgbe_enable_mac_interrupts(pdata); + /* + * Initialize ECC related features + */ + xgbe_enable_ecc_interrupts(pdata); + DBGPR("<--xgbe_init\n"); return 0; @@ -2903,9 +3490,14 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->read_mmd_regs = xgbe_read_mmd_regs; hw_if->write_mmd_regs = xgbe_write_mmd_regs; - hw_if->set_gmii_speed = xgbe_set_gmii_speed; - hw_if->set_gmii_2500_speed = xgbe_set_gmii_2500_speed; - hw_if->set_xgmii_speed = xgbe_set_xgmii_speed; + hw_if->set_speed = xgbe_set_speed; + + hw_if->set_ext_mii_mode = xgbe_set_ext_mii_mode; + hw_if->read_ext_mii_regs = xgbe_read_ext_mii_regs; + hw_if->write_ext_mii_regs = xgbe_write_ext_mii_regs; + + hw_if->set_gpio = xgbe_set_gpio; + hw_if->clr_gpio = xgbe_clr_gpio; hw_if->enable_tx = xgbe_enable_tx; hw_if->disable_tx = xgbe_disable_tx; @@ -2984,5 +3576,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->set_rss_hash_key = xgbe_set_rss_hash_key; hw_if->set_rss_lookup_table = xgbe_set_rss_lookup_table; + /* For ECC */ + hw_if->disable_ecc_ded = xgbe_disable_ecc_ded; + hw_if->disable_ecc_sec = xgbe_disable_ecc_sec; + DBGPR("<--xgbe_init_function_ptrs\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index c4e668208e04..155190db682d 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -114,7 +114,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/platform_device.h> +#include <linux/module.h> #include <linux/spinlock.h> #include <linux/tcp.h> #include <linux/if_vlan.h> @@ -127,8 +127,35 @@ #include "xgbe.h" #include "xgbe-common.h" +static unsigned int ecc_sec_info_threshold = 10; +static unsigned int ecc_sec_warn_threshold = 10000; +static unsigned int ecc_sec_period = 600; +static unsigned int ecc_ded_threshold = 2; +static unsigned int ecc_ded_period = 600; + +#ifdef CONFIG_AMD_XGBE_HAVE_ECC +/* Only expose the ECC parameters if supported */ +module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_sec_info_threshold, + " ECC corrected error informational threshold setting"); + +module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_sec_warn_threshold, + " ECC corrected error warning threshold setting"); + +module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)"); + +module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting"); + +module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)"); +#endif + static int xgbe_one_poll(struct napi_struct *, int); static int xgbe_all_poll(struct napi_struct *, int); +static void xgbe_stop(struct xgbe_prv_data *); static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) { @@ -160,18 +187,8 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE + (DMA_CH_INC * i); - if (pdata->per_channel_irq) { - /* Get the DMA interrupt (offset 1) */ - ret = platform_get_irq(pdata->pdev, i + 1); - if (ret < 0) { - netdev_err(pdata->netdev, - "platform_get_irq %u failed\n", - i + 1); - goto err_irq; - } - - channel->dma_irq = ret; - } + if (pdata->per_channel_irq) + channel->dma_irq = pdata->channel_irq[i]; if (i < pdata->tx_ring_count) { spin_lock_init(&tx_ring->lock); @@ -194,9 +211,6 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata) return 0; -err_irq: - kfree(rx_ring); - err_rx_ring: kfree(tx_ring); @@ -266,48 +280,161 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu) return rx_buf_size; } -static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata) +static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata, + struct xgbe_channel *channel) { struct xgbe_hw_if *hw_if = &pdata->hw_if; - struct xgbe_channel *channel; enum xgbe_int int_id; + + if (channel->tx_ring && channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI_RI; + else if (channel->tx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI; + else if (channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_RI; + else + return; + + hw_if->enable_int(channel, int_id); +} + +static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata) +{ + struct xgbe_channel *channel; unsigned int i; channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (channel->tx_ring && channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI_RI; - else if (channel->tx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI; - else if (channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_RI; - else - continue; + for (i = 0; i < pdata->channel_count; i++, channel++) + xgbe_enable_rx_tx_int(pdata, channel); +} - hw_if->enable_int(channel, int_id); - } +static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata, + struct xgbe_channel *channel) +{ + struct xgbe_hw_if *hw_if = &pdata->hw_if; + enum xgbe_int int_id; + + if (channel->tx_ring && channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI_RI; + else if (channel->tx_ring) + int_id = XGMAC_INT_DMA_CH_SR_TI; + else if (channel->rx_ring) + int_id = XGMAC_INT_DMA_CH_SR_RI; + else + return; + + hw_if->disable_int(channel, int_id); } static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata) { - struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_channel *channel; - enum xgbe_int int_id; unsigned int i; channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - if (channel->tx_ring && channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI_RI; - else if (channel->tx_ring) - int_id = XGMAC_INT_DMA_CH_SR_TI; - else if (channel->rx_ring) - int_id = XGMAC_INT_DMA_CH_SR_RI; - else - continue; + for (i = 0; i < pdata->channel_count; i++, channel++) + xgbe_disable_rx_tx_int(pdata, channel); +} + +static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period, + unsigned int *count, const char *area) +{ + if (time_before(jiffies, *period)) { + (*count)++; + } else { + *period = jiffies + (ecc_sec_period * HZ); + *count = 1; + } + + if (*count > ecc_sec_info_threshold) + dev_warn_once(pdata->dev, + "%s ECC corrected errors exceed informational threshold\n", + area); + + if (*count > ecc_sec_warn_threshold) { + dev_warn_once(pdata->dev, + "%s ECC corrected errors exceed warning threshold\n", + area); + return true; + } + + return false; +} + +static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period, + unsigned int *count, const char *area) +{ + if (time_before(jiffies, *period)) { + (*count)++; + } else { + *period = jiffies + (ecc_ded_period * HZ); + *count = 1; + } + + if (*count > ecc_ded_threshold) { + netdev_alert(pdata->netdev, + "%s ECC detected errors exceed threshold\n", + area); + return true; + } + + return false; +} + +static irqreturn_t xgbe_ecc_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = data; + unsigned int ecc_isr; + bool stop = false; + + /* Mask status with only the interrupts we care about */ + ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR); + ecc_isr &= XP_IOREAD(pdata, XP_ECC_IER); + netif_dbg(pdata, intr, pdata->netdev, "ECC_ISR=%#010x\n", ecc_isr); + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_DED)) { + stop |= xgbe_ecc_ded(pdata, &pdata->tx_ded_period, + &pdata->tx_ded_count, "TX fifo"); + } - hw_if->disable_int(channel, int_id); + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_DED)) { + stop |= xgbe_ecc_ded(pdata, &pdata->rx_ded_period, + &pdata->rx_ded_count, "RX fifo"); } + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_DED)) { + stop |= xgbe_ecc_ded(pdata, &pdata->desc_ded_period, + &pdata->desc_ded_count, + "descriptor cache"); + } + + if (stop) { + pdata->hw_if.disable_ecc_ded(pdata); + schedule_work(&pdata->stopdev_work); + goto out; + } + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_SEC)) { + if (xgbe_ecc_sec(pdata, &pdata->tx_sec_period, + &pdata->tx_sec_count, "TX fifo")) + pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_TX); + } + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_SEC)) + if (xgbe_ecc_sec(pdata, &pdata->rx_sec_period, + &pdata->rx_sec_count, "RX fifo")) + pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_RX); + + if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_SEC)) + if (xgbe_ecc_sec(pdata, &pdata->desc_sec_period, + &pdata->desc_sec_count, "descriptor cache")) + pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_DESC); + +out: + /* Clear all ECC interrupts */ + XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr); + + return IRQ_HANDLED; } static irqreturn_t xgbe_isr(int irq, void *data) @@ -316,7 +443,7 @@ static irqreturn_t xgbe_isr(int irq, void *data) struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_channel *channel; unsigned int dma_isr, dma_ch_isr; - unsigned int mac_isr, mac_tssr; + unsigned int mac_isr, mac_tssr, mac_mdioisr; unsigned int i; /* The DMA interrupt status register also reports MAC and MTL @@ -353,6 +480,13 @@ static irqreturn_t xgbe_isr(int irq, void *data) /* Turn on polling */ __napi_schedule_irqoff(&pdata->napi); } + } else { + /* Don't clear Rx/Tx status if doing per channel DMA + * interrupts, these will be cleared by the ISR for + * per channel DMA interrupts. + */ + XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, TI, 0); + XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, RI, 0); } if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU)) @@ -362,13 +496,16 @@ static irqreturn_t xgbe_isr(int irq, void *data) if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE)) schedule_work(&pdata->restart_work); - /* Clear all interrupt signals */ + /* Clear interrupt signals */ XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr); } if (XGMAC_GET_BITS(dma_isr, DMA_ISR, MACIS)) { mac_isr = XGMAC_IOREAD(pdata, MAC_ISR); + netif_dbg(pdata, intr, pdata->netdev, "MAC_ISR=%#010x\n", + mac_isr); + if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCTXIS)) hw_if->tx_mmc_int(pdata); @@ -378,6 +515,9 @@ static irqreturn_t xgbe_isr(int irq, void *data) if (XGMAC_GET_BITS(mac_isr, MAC_ISR, TSIS)) { mac_tssr = XGMAC_IOREAD(pdata, MAC_TSSR); + netif_dbg(pdata, intr, pdata->netdev, + "MAC_TSSR=%#010x\n", mac_tssr); + if (XGMAC_GET_BITS(mac_tssr, MAC_TSSR, TXTSC)) { /* Read Tx Timestamp to clear interrupt */ pdata->tx_tstamp = @@ -386,8 +526,31 @@ static irqreturn_t xgbe_isr(int irq, void *data) &pdata->tx_tstamp_work); } } + + if (XGMAC_GET_BITS(mac_isr, MAC_ISR, SMI)) { + mac_mdioisr = XGMAC_IOREAD(pdata, MAC_MDIOISR); + + netif_dbg(pdata, intr, pdata->netdev, + "MAC_MDIOISR=%#010x\n", mac_mdioisr); + + if (XGMAC_GET_BITS(mac_mdioisr, MAC_MDIOISR, + SNGLCOMPINT)) + complete(&pdata->mdio_complete); + } } + /* If there is not a separate AN irq, handle it here */ + if (pdata->dev_irq == pdata->an_irq) + pdata->phy_if.an_isr(irq, pdata); + + /* If there is not a separate ECC irq, handle it here */ + if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq)) + xgbe_ecc_isr(irq, pdata); + + /* If there is not a separate I2C irq, handle it here */ + if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq)) + pdata->i2c_if.i2c_isr(irq, pdata); + isr_done: return IRQ_HANDLED; } @@ -395,18 +558,29 @@ isr_done: static irqreturn_t xgbe_dma_isr(int irq, void *data) { struct xgbe_channel *channel = data; + struct xgbe_prv_data *pdata = channel->pdata; + unsigned int dma_status; /* Per channel DMA interrupts are enabled, so we use the per * channel napi structure and not the private data napi structure */ if (napi_schedule_prep(&channel->napi)) { /* Disable Tx and Rx interrupts */ - disable_irq_nosync(channel->dma_irq); + if (pdata->channel_irq_mode) + xgbe_disable_rx_tx_int(pdata, channel); + else + disable_irq_nosync(channel->dma_irq); /* Turn on polling */ __napi_schedule_irqoff(&channel->napi); } + /* Clear Tx/Rx signals */ + dma_status = 0; + XGMAC_SET_BITS(dma_status, DMA_CH_SR, TI, 1); + XGMAC_SET_BITS(dma_status, DMA_CH_SR, RI, 1); + XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_status); + return IRQ_HANDLED; } @@ -423,7 +597,10 @@ static void xgbe_tx_timer(unsigned long data) if (napi_schedule_prep(napi)) { /* Disable Tx and Rx interrupts */ if (pdata->per_channel_irq) - disable_irq_nosync(channel->dma_irq); + if (pdata->channel_irq_mode) + xgbe_disable_rx_tx_int(pdata, channel); + else + disable_irq_nosync(channel->dma_irq); else xgbe_disable_rx_tx_ints(pdata); @@ -590,6 +767,10 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) hw_feat->tx_ch_cnt++; hw_feat->tc_cnt++; + /* Translate the fifo sizes into actual numbers */ + hw_feat->rx_fifo_size = 1 << (hw_feat->rx_fifo_size + 7); + hw_feat->tx_fifo_size = 1 << (hw_feat->tx_fifo_size + 7); + DBGPR("<--xgbe_get_all_hw_features\n"); } @@ -652,6 +833,16 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) return ret; } + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) { + ret = devm_request_irq(pdata->dev, pdata->ecc_irq, xgbe_ecc_isr, + 0, pdata->ecc_name, pdata); + if (ret) { + netdev_alert(netdev, "error requesting ecc irq %d\n", + pdata->ecc_irq); + goto err_dev_irq; + } + } + if (!pdata->per_channel_irq) return 0; @@ -668,17 +859,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata) if (ret) { netdev_alert(netdev, "error requesting irq %d\n", channel->dma_irq); - goto err_irq; + goto err_dma_irq; } } return 0; -err_irq: +err_dma_irq: /* Using an unsigned int, 'i' will go to UINT_MAX and exit */ for (i--, channel--; i < pdata->channel_count; i--, channel--) devm_free_irq(pdata->dev, channel->dma_irq, channel); + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) + devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); + +err_dev_irq: devm_free_irq(pdata->dev, pdata->dev_irq, pdata); return ret; @@ -691,6 +886,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) + devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); + if (!pdata->per_channel_irq) return; @@ -778,7 +976,7 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata) DBGPR("<--xgbe_free_rx_data\n"); } -static int xgbe_phy_init(struct xgbe_prv_data *pdata) +static int xgbe_phy_reset(struct xgbe_prv_data *pdata) { pdata->phy_link = -1; pdata->phy_speed = SPEED_UNKNOWN; @@ -874,16 +1072,16 @@ static int xgbe_start(struct xgbe_prv_data *pdata) hw_if->init(pdata); - ret = phy_if->phy_start(pdata); - if (ret) - goto err_phy; - xgbe_napi_enable(pdata, 1); ret = xgbe_request_irqs(pdata); if (ret) goto err_napi; + ret = phy_if->phy_start(pdata); + if (ret) + goto err_irqs; + hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); @@ -892,16 +1090,18 @@ static int xgbe_start(struct xgbe_prv_data *pdata) xgbe_start_timers(pdata); queue_work(pdata->dev_workqueue, &pdata->service_work); + clear_bit(XGBE_STOPPED, &pdata->dev_state); + DBGPR("<--xgbe_start\n"); return 0; +err_irqs: + xgbe_free_irqs(pdata); + err_napi: xgbe_napi_disable(pdata, 1); - phy_if->phy_stop(pdata); - -err_phy: hw_if->exit(pdata); return ret; @@ -918,6 +1118,9 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_stop\n"); + if (test_bit(XGBE_STOPPED, &pdata->dev_state)) + return; + netif_tx_stop_all_queues(netdev); xgbe_stop_timers(pdata); @@ -943,9 +1146,29 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) netdev_tx_reset_queue(txq); } + set_bit(XGBE_STOPPED, &pdata->dev_state); + DBGPR("<--xgbe_stop\n"); } +static void xgbe_stopdev(struct work_struct *work) +{ + struct xgbe_prv_data *pdata = container_of(work, + struct xgbe_prv_data, + stopdev_work); + + rtnl_lock(); + + xgbe_stop(pdata); + + xgbe_free_tx_data(pdata); + xgbe_free_rx_data(pdata); + + rtnl_unlock(); + + netdev_alert(pdata->netdev, "device stopped\n"); +} + static void xgbe_restart_dev(struct xgbe_prv_data *pdata) { DBGPR("-->xgbe_restart_dev\n"); @@ -1292,8 +1515,8 @@ static int xgbe_open(struct net_device *netdev) DBGPR("-->xgbe_open\n"); - /* Initialize the phy */ - ret = xgbe_phy_init(pdata); + /* Reset the phy settings */ + ret = xgbe_phy_reset(pdata); if (ret) return ret; @@ -1328,6 +1551,7 @@ static int xgbe_open(struct net_device *netdev) INIT_WORK(&pdata->service_work, xgbe_service); INIT_WORK(&pdata->restart_work, xgbe_restart); + INIT_WORK(&pdata->stopdev_work, xgbe_stopdev); INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp); xgbe_init_timers(pdata); @@ -2036,6 +2260,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget) { struct xgbe_channel *channel = container_of(napi, struct xgbe_channel, napi); + struct xgbe_prv_data *pdata = channel->pdata; int processed = 0; DBGPR("-->xgbe_one_poll: budget=%d\n", budget); @@ -2052,7 +2277,10 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget) napi_complete_done(napi, processed); /* Enable Tx and Rx interrupts */ - enable_irq(channel->dma_irq); + if (pdata->channel_irq_mode) + xgbe_enable_rx_tx_int(pdata, channel); + else + enable_irq(channel->dma_irq); } DBGPR("<--xgbe_one_poll: received = %d\n", processed); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 4007b429c80c..920566a3a599 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -272,97 +272,86 @@ static int xgbe_set_pauseparam(struct net_device *netdev, return ret; } -static int xgbe_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int xgbe_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - cmd->phy_address = pdata->phy.address; + cmd->base.phy_address = pdata->phy.address; - cmd->supported = pdata->phy.supported; - cmd->advertising = pdata->phy.advertising; - cmd->lp_advertising = pdata->phy.lp_advertising; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + pdata->phy.supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + pdata->phy.advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + pdata->phy.lp_advertising); - cmd->autoneg = pdata->phy.autoneg; - ethtool_cmd_speed_set(cmd, pdata->phy.speed); - cmd->duplex = pdata->phy.duplex; + cmd->base.autoneg = pdata->phy.autoneg; + cmd->base.speed = pdata->phy.speed; + cmd->base.duplex = pdata->phy.duplex; - cmd->port = PORT_NONE; - cmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_NONE; return 0; } -static int xgbe_set_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int xgbe_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct xgbe_prv_data *pdata = netdev_priv(netdev); + u32 advertising; u32 speed; int ret; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; - if (cmd->phy_address != pdata->phy.address) { + if (cmd->base.phy_address != pdata->phy.address) { netdev_err(netdev, "invalid phy address %hhu\n", - cmd->phy_address); + cmd->base.phy_address); return -EINVAL; } - if ((cmd->autoneg != AUTONEG_ENABLE) && - (cmd->autoneg != AUTONEG_DISABLE)) { + if ((cmd->base.autoneg != AUTONEG_ENABLE) && + (cmd->base.autoneg != AUTONEG_DISABLE)) { netdev_err(netdev, "unsupported autoneg %hhu\n", - cmd->autoneg); + cmd->base.autoneg); return -EINVAL; } - if (cmd->autoneg == AUTONEG_DISABLE) { - switch (speed) { - case SPEED_10000: - break; - case SPEED_2500: - if (pdata->speed_set != XGBE_SPEEDSET_2500_10000) { - netdev_err(netdev, "unsupported speed %u\n", - speed); - return -EINVAL; - } - break; - case SPEED_1000: - if (pdata->speed_set != XGBE_SPEEDSET_1000_10000) { - netdev_err(netdev, "unsupported speed %u\n", - speed); - return -EINVAL; - } - break; - default: + if (cmd->base.autoneg == AUTONEG_DISABLE) { + if (!pdata->phy_if.phy_valid_speed(pdata, speed)) { netdev_err(netdev, "unsupported speed %u\n", speed); return -EINVAL; } - if (cmd->duplex != DUPLEX_FULL) { + if (cmd->base.duplex != DUPLEX_FULL) { netdev_err(netdev, "unsupported duplex %hhu\n", - cmd->duplex); + cmd->base.duplex); return -EINVAL; } } + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + netif_dbg(pdata, link, netdev, "requested advertisement %#x, phy supported %#x\n", - cmd->advertising, pdata->phy.supported); + advertising, pdata->phy.supported); - cmd->advertising &= pdata->phy.supported; - if ((cmd->autoneg == AUTONEG_ENABLE) && !cmd->advertising) { + advertising &= pdata->phy.supported; + if ((cmd->base.autoneg == AUTONEG_ENABLE) && !advertising) { netdev_err(netdev, "unsupported requested advertisement\n"); return -EINVAL; } ret = 0; - pdata->phy.autoneg = cmd->autoneg; + pdata->phy.autoneg = cmd->base.autoneg; pdata->phy.speed = speed; - pdata->phy.duplex = cmd->duplex; - pdata->phy.advertising = cmd->advertising; + pdata->phy.duplex = cmd->base.duplex; + pdata->phy.advertising = advertising; - if (cmd->autoneg == AUTONEG_ENABLE) + if (cmd->base.autoneg == AUTONEG_ENABLE) pdata->phy.advertising |= ADVERTISED_Autoneg; else pdata->phy.advertising &= ~ADVERTISED_Autoneg; @@ -602,8 +591,6 @@ static int xgbe_get_ts_info(struct net_device *netdev, } static const struct ethtool_ops xgbe_ethtool_ops = { - .get_settings = xgbe_get_settings, - .set_settings = xgbe_set_settings, .get_drvinfo = xgbe_get_drvinfo, .get_msglevel = xgbe_get_msglevel, .set_msglevel = xgbe_set_msglevel, @@ -621,6 +608,8 @@ static const struct ethtool_ops xgbe_ethtool_ops = { .get_rxfh = xgbe_get_rxfh, .set_rxfh = xgbe_set_rxfh, .get_ts_info = xgbe_get_ts_info, + .get_link_ksettings = xgbe_get_link_ksettings, + .set_link_ksettings = xgbe_set_link_ksettings, }; const struct ethtool_ops *xgbe_get_ethtool_ops(void) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c new file mode 100644 index 000000000000..0c7088a426e9 --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -0,0 +1,492 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/delay.h> +#include <linux/completion.h> +#include <linux/mutex.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#define XGBE_ABORT_COUNT 500 +#define XGBE_DISABLE_COUNT 1000 + +#define XGBE_STD_SPEED 1 + +#define XGBE_INTR_RX_FULL BIT(IC_RAW_INTR_STAT_RX_FULL_INDEX) +#define XGBE_INTR_TX_EMPTY BIT(IC_RAW_INTR_STAT_TX_EMPTY_INDEX) +#define XGBE_INTR_TX_ABRT BIT(IC_RAW_INTR_STAT_TX_ABRT_INDEX) +#define XGBE_INTR_STOP_DET BIT(IC_RAW_INTR_STAT_STOP_DET_INDEX) +#define XGBE_DEFAULT_INT_MASK (XGBE_INTR_RX_FULL | \ + XGBE_INTR_TX_EMPTY | \ + XGBE_INTR_TX_ABRT | \ + XGBE_INTR_STOP_DET) + +#define XGBE_I2C_READ BIT(8) +#define XGBE_I2C_STOP BIT(9) + +static int xgbe_i2c_abort(struct xgbe_prv_data *pdata) +{ + unsigned int wait = XGBE_ABORT_COUNT; + + /* Must be enabled to recognize the abort request */ + XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, 1); + + /* Issue the abort */ + XI2C_IOWRITE_BITS(pdata, IC_ENABLE, ABORT, 1); + + while (wait--) { + if (!XI2C_IOREAD_BITS(pdata, IC_ENABLE, ABORT)) + return 0; + + usleep_range(500, 600); + } + + return -EBUSY; +} + +static int xgbe_i2c_set_enable(struct xgbe_prv_data *pdata, bool enable) +{ + unsigned int wait = XGBE_DISABLE_COUNT; + unsigned int mode = enable ? 1 : 0; + + while (wait--) { + XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, mode); + if (XI2C_IOREAD_BITS(pdata, IC_ENABLE_STATUS, EN) == mode) + return 0; + + usleep_range(100, 110); + } + + return -EBUSY; +} + +static int xgbe_i2c_disable(struct xgbe_prv_data *pdata) +{ + unsigned int ret; + + ret = xgbe_i2c_set_enable(pdata, false); + if (ret) { + /* Disable failed, try an abort */ + ret = xgbe_i2c_abort(pdata); + if (ret) + return ret; + + /* Abort succeeded, try to disable again */ + ret = xgbe_i2c_set_enable(pdata, false); + } + + return ret; +} + +static int xgbe_i2c_enable(struct xgbe_prv_data *pdata) +{ + return xgbe_i2c_set_enable(pdata, true); +} + +static void xgbe_i2c_clear_all_interrupts(struct xgbe_prv_data *pdata) +{ + XI2C_IOREAD(pdata, IC_CLR_INTR); +} + +static void xgbe_i2c_disable_interrupts(struct xgbe_prv_data *pdata) +{ + XI2C_IOWRITE(pdata, IC_INTR_MASK, 0); +} + +static void xgbe_i2c_enable_interrupts(struct xgbe_prv_data *pdata) +{ + XI2C_IOWRITE(pdata, IC_INTR_MASK, XGBE_DEFAULT_INT_MASK); +} + +static void xgbe_i2c_write(struct xgbe_prv_data *pdata) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + unsigned int tx_slots; + unsigned int cmd; + + /* Configured to never receive Rx overflows, so fill up Tx fifo */ + tx_slots = pdata->i2c.tx_fifo_size - XI2C_IOREAD(pdata, IC_TXFLR); + while (tx_slots && state->tx_len) { + if (state->op->cmd == XGBE_I2C_CMD_READ) + cmd = XGBE_I2C_READ; + else + cmd = *state->tx_buf++; + + if (state->tx_len == 1) + XI2C_SET_BITS(cmd, IC_DATA_CMD, STOP, 1); + + XI2C_IOWRITE(pdata, IC_DATA_CMD, cmd); + + tx_slots--; + state->tx_len--; + } + + /* No more Tx operations, so ignore TX_EMPTY and return */ + if (!state->tx_len) + XI2C_IOWRITE_BITS(pdata, IC_INTR_MASK, TX_EMPTY, 0); +} + +static void xgbe_i2c_read(struct xgbe_prv_data *pdata) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + unsigned int rx_slots; + + /* Anything to be read? */ + if (state->op->cmd != XGBE_I2C_CMD_READ) + return; + + rx_slots = XI2C_IOREAD(pdata, IC_RXFLR); + while (rx_slots && state->rx_len) { + *state->rx_buf++ = XI2C_IOREAD(pdata, IC_DATA_CMD); + state->rx_len--; + rx_slots--; + } +} + +static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata, + unsigned int isr) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + + if (isr & XGBE_INTR_TX_ABRT) { + state->tx_abort_source = XI2C_IOREAD(pdata, IC_TX_ABRT_SOURCE); + XI2C_IOREAD(pdata, IC_CLR_TX_ABRT); + } + + if (isr & XGBE_INTR_STOP_DET) + XI2C_IOREAD(pdata, IC_CLR_STOP_DET); +} + +static irqreturn_t xgbe_i2c_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + unsigned int isr; + + isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT); + netif_dbg(pdata, intr, pdata->netdev, + "I2C interrupt received: status=%#010x\n", isr); + + xgbe_i2c_clear_isr_interrupts(pdata, isr); + + if (isr & XGBE_INTR_TX_ABRT) { + netif_dbg(pdata, link, pdata->netdev, + "I2C TX_ABRT received (%#010x) for target %#04x\n", + state->tx_abort_source, state->op->target); + + xgbe_i2c_disable_interrupts(pdata); + + state->ret = -EIO; + goto out; + } + + /* Check for data in the Rx fifo */ + xgbe_i2c_read(pdata); + + /* Fill up the Tx fifo next */ + xgbe_i2c_write(pdata); + +out: + /* Complete on an error or STOP condition */ + if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET)) + complete(&pdata->i2c_complete); + + return IRQ_HANDLED; +} + +static void xgbe_i2c_set_mode(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + reg = XI2C_IOREAD(pdata, IC_CON); + XI2C_SET_BITS(reg, IC_CON, MASTER_MODE, 1); + XI2C_SET_BITS(reg, IC_CON, SLAVE_DISABLE, 1); + XI2C_SET_BITS(reg, IC_CON, RESTART_EN, 1); + XI2C_SET_BITS(reg, IC_CON, SPEED, XGBE_STD_SPEED); + XI2C_SET_BITS(reg, IC_CON, RX_FIFO_FULL_HOLD, 1); + XI2C_IOWRITE(pdata, IC_CON, reg); +} + +static void xgbe_i2c_get_features(struct xgbe_prv_data *pdata) +{ + struct xgbe_i2c *i2c = &pdata->i2c; + unsigned int reg; + + reg = XI2C_IOREAD(pdata, IC_COMP_PARAM_1); + i2c->max_speed_mode = XI2C_GET_BITS(reg, IC_COMP_PARAM_1, + MAX_SPEED_MODE); + i2c->rx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1, + RX_BUFFER_DEPTH); + i2c->tx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1, + TX_BUFFER_DEPTH); + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "I2C features: %s=%u, %s=%u, %s=%u\n", + "MAX_SPEED_MODE", i2c->max_speed_mode, + "RX_BUFFER_DEPTH", i2c->rx_fifo_size, + "TX_BUFFER_DEPTH", i2c->tx_fifo_size); +} + +static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr) +{ + XI2C_IOWRITE(pdata, IC_TAR, addr); +} + +static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata) +{ + if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT)) + return IRQ_HANDLED; + + return xgbe_i2c_isr(irq, pdata); +} + +static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op) +{ + struct xgbe_i2c_op_state *state = &pdata->i2c.op_state; + int ret; + + mutex_lock(&pdata->i2c_mutex); + + reinit_completion(&pdata->i2c_complete); + + ret = xgbe_i2c_disable(pdata); + if (ret) { + netdev_err(pdata->netdev, "failed to disable i2c master\n"); + goto unlock; + } + + xgbe_i2c_set_target(pdata, op->target); + + memset(state, 0, sizeof(*state)); + state->op = op; + state->tx_len = op->len; + state->tx_buf = op->buf; + state->rx_len = op->len; + state->rx_buf = op->buf; + + xgbe_i2c_clear_all_interrupts(pdata); + ret = xgbe_i2c_enable(pdata); + if (ret) { + netdev_err(pdata->netdev, "failed to enable i2c master\n"); + goto unlock; + } + + /* Enabling the interrupts will cause the TX FIFO empty interrupt to + * fire and begin to process the command via the ISR. + */ + xgbe_i2c_enable_interrupts(pdata); + + if (!wait_for_completion_timeout(&pdata->i2c_complete, HZ)) { + netdev_err(pdata->netdev, "i2c operation timed out\n"); + ret = -ETIMEDOUT; + goto disable; + } + + ret = state->ret; + if (ret) { + if (state->tx_abort_source & IC_TX_ABRT_7B_ADDR_NOACK) + ret = -ENOTCONN; + else if (state->tx_abort_source & IC_TX_ABRT_ARB_LOST) + ret = -EAGAIN; + } + +disable: + xgbe_i2c_disable_interrupts(pdata); + xgbe_i2c_disable(pdata); + +unlock: + mutex_unlock(&pdata->i2c_mutex); + + return ret; +} + +static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) +{ + if (!pdata->i2c.started) + return; + + netif_dbg(pdata, link, pdata->netdev, "stopping I2C\n"); + + pdata->i2c.started = 0; + + xgbe_i2c_disable_interrupts(pdata); + xgbe_i2c_disable(pdata); + xgbe_i2c_clear_all_interrupts(pdata); + + if (pdata->dev_irq != pdata->i2c_irq) + devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); +} + +static int xgbe_i2c_start(struct xgbe_prv_data *pdata) +{ + int ret; + + if (pdata->i2c.started) + return 0; + + netif_dbg(pdata, link, pdata->netdev, "starting I2C\n"); + + /* If we have a separate I2C irq, enable it */ + if (pdata->dev_irq != pdata->i2c_irq) { + ret = devm_request_irq(pdata->dev, pdata->i2c_irq, + xgbe_i2c_isr, 0, pdata->i2c_name, + pdata); + if (ret) { + netdev_err(pdata->netdev, "i2c irq request failed\n"); + return ret; + } + } + + pdata->i2c.started = 1; + + return 0; +} + +static int xgbe_i2c_init(struct xgbe_prv_data *pdata) +{ + int ret; + + xgbe_i2c_disable_interrupts(pdata); + + ret = xgbe_i2c_disable(pdata); + if (ret) { + dev_err(pdata->dev, "failed to disable i2c master\n"); + return ret; + } + + xgbe_i2c_get_features(pdata); + + xgbe_i2c_set_mode(pdata); + + xgbe_i2c_clear_all_interrupts(pdata); + + return 0; +} + +void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *i2c_if) +{ + i2c_if->i2c_init = xgbe_i2c_init; + + i2c_if->i2c_start = xgbe_i2c_start; + i2c_if->i2c_stop = xgbe_i2c_stop; + + i2c_if->i2c_xfer = xgbe_i2c_xfer; + + i2c_if->i2c_isr = xgbe_i2c_combined_isr; +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 6997f1110ece..b87a89988ffd 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -116,19 +116,10 @@ #include <linux/module.h> #include <linux/device.h> -#include <linux/platform_device.h> #include <linux/spinlock.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/io.h> -#include <linux/of.h> -#include <linux/of_net.h> -#include <linux/of_address.h> -#include <linux/of_platform.h> -#include <linux/clk.h> -#include <linux/property.h> -#include <linux/acpi.h> -#include <linux/mdio.h> #include "xgbe.h" #include "xgbe-common.h" @@ -145,42 +136,6 @@ MODULE_PARM_DESC(debug, " Network interface message level setting"); static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP); -static const u32 xgbe_serdes_blwc[] = { - XGBE_SPEED_1000_BLWC, - XGBE_SPEED_2500_BLWC, - XGBE_SPEED_10000_BLWC, -}; - -static const u32 xgbe_serdes_cdr_rate[] = { - XGBE_SPEED_1000_CDR, - XGBE_SPEED_2500_CDR, - XGBE_SPEED_10000_CDR, -}; - -static const u32 xgbe_serdes_pq_skew[] = { - XGBE_SPEED_1000_PQ, - XGBE_SPEED_2500_PQ, - XGBE_SPEED_10000_PQ, -}; - -static const u32 xgbe_serdes_tx_amp[] = { - XGBE_SPEED_1000_TXAMP, - XGBE_SPEED_2500_TXAMP, - XGBE_SPEED_10000_TXAMP, -}; - -static const u32 xgbe_serdes_dfe_tap_cfg[] = { - XGBE_SPEED_1000_DFE_TAP_CONFIG, - XGBE_SPEED_2500_DFE_TAP_CONFIG, - XGBE_SPEED_10000_DFE_TAP_CONFIG, -}; - -static const u32 xgbe_serdes_dfe_tap_ena[] = { - XGBE_SPEED_1000_DFE_TAP_ENABLE, - XGBE_SPEED_2500_DFE_TAP_ENABLE, - XGBE_SPEED_10000_DFE_TAP_ENABLE, -}; - static void xgbe_default_config(struct xgbe_prv_data *pdata) { DBGPR("-->xgbe_default_config\n"); @@ -206,456 +161,124 @@ static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata) { xgbe_init_function_ptrs_dev(&pdata->hw_if); xgbe_init_function_ptrs_phy(&pdata->phy_if); + xgbe_init_function_ptrs_i2c(&pdata->i2c_if); xgbe_init_function_ptrs_desc(&pdata->desc_if); -} - -#ifdef CONFIG_ACPI -static int xgbe_acpi_support(struct xgbe_prv_data *pdata) -{ - struct device *dev = pdata->dev; - u32 property; - int ret; - - /* Obtain the system clock setting */ - ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property); - if (ret) { - dev_err(dev, "unable to obtain %s property\n", - XGBE_ACPI_DMA_FREQ); - return ret; - } - pdata->sysclk_rate = property; - - /* Obtain the PTP clock setting */ - ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property); - if (ret) { - dev_err(dev, "unable to obtain %s property\n", - XGBE_ACPI_PTP_FREQ); - return ret; - } - pdata->ptpclk_rate = property; - return 0; + pdata->vdata->init_function_ptrs_phy_impl(&pdata->phy_if); } -#else /* CONFIG_ACPI */ -static int xgbe_acpi_support(struct xgbe_prv_data *pdata) -{ - return -EINVAL; -} -#endif /* CONFIG_ACPI */ -#ifdef CONFIG_OF -static int xgbe_of_support(struct xgbe_prv_data *pdata) -{ - struct device *dev = pdata->dev; - - /* Obtain the system clock setting */ - pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK); - if (IS_ERR(pdata->sysclk)) { - dev_err(dev, "dma devm_clk_get failed\n"); - return PTR_ERR(pdata->sysclk); - } - pdata->sysclk_rate = clk_get_rate(pdata->sysclk); - - /* Obtain the PTP clock setting */ - pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK); - if (IS_ERR(pdata->ptpclk)) { - dev_err(dev, "ptp devm_clk_get failed\n"); - return PTR_ERR(pdata->ptpclk); - } - pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk); - - return 0; -} - -static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) -{ - struct device *dev = pdata->dev; - struct device_node *phy_node; - struct platform_device *phy_pdev; - - phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0); - if (phy_node) { - /* Old style device tree: - * The XGBE and PHY resources are separate - */ - phy_pdev = of_find_device_by_node(phy_node); - of_node_put(phy_node); - } else { - /* New style device tree: - * The XGBE and PHY resources are grouped together with - * the PHY resources listed last - */ - get_device(dev); - phy_pdev = pdata->pdev; - } - - return phy_pdev; -} -#else /* CONFIG_OF */ -static int xgbe_of_support(struct xgbe_prv_data *pdata) -{ - return -EINVAL; -} - -static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) -{ - return NULL; -} -#endif /* CONFIG_OF */ - -static unsigned int xgbe_resource_count(struct platform_device *pdev, - unsigned int type) -{ - unsigned int count; - int i; - - for (i = 0, count = 0; i < pdev->num_resources; i++) { - struct resource *res = &pdev->resource[i]; - - if (type == resource_type(res)) - count++; - } - - return count; -} - -static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata) -{ - struct platform_device *phy_pdev; - - if (pdata->use_acpi) { - get_device(pdata->dev); - phy_pdev = pdata->pdev; - } else { - phy_pdev = xgbe_of_get_phy_pdev(pdata); - } - - return phy_pdev; -} - -static int xgbe_probe(struct platform_device *pdev) +struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev) { struct xgbe_prv_data *pdata; struct net_device *netdev; - struct device *dev = &pdev->dev, *phy_dev; - struct platform_device *phy_pdev; - struct resource *res; - const char *phy_mode; - unsigned int i, phy_memnum, phy_irqnum; - enum dev_dma_attr attr; - int ret; - - DBGPR("--> xgbe_probe\n"); netdev = alloc_etherdev_mq(sizeof(struct xgbe_prv_data), XGBE_MAX_DMA_CHANNELS); if (!netdev) { - dev_err(dev, "alloc_etherdev failed\n"); - ret = -ENOMEM; - goto err_alloc; + dev_err(dev, "alloc_etherdev_mq failed\n"); + return ERR_PTR(-ENOMEM); } SET_NETDEV_DEV(netdev, dev); pdata = netdev_priv(netdev); pdata->netdev = netdev; - pdata->pdev = pdev; - pdata->adev = ACPI_COMPANION(dev); pdata->dev = dev; - platform_set_drvdata(pdev, netdev); spin_lock_init(&pdata->lock); spin_lock_init(&pdata->xpcs_lock); mutex_init(&pdata->rss_mutex); spin_lock_init(&pdata->tstamp_lock); + mutex_init(&pdata->i2c_mutex); + init_completion(&pdata->i2c_complete); + init_completion(&pdata->mdio_complete); pdata->msg_enable = netif_msg_init(debug, default_msg_level); set_bit(XGBE_DOWN, &pdata->dev_state); + set_bit(XGBE_STOPPED, &pdata->dev_state); - /* Check if we should use ACPI or DT */ - pdata->use_acpi = dev->of_node ? 0 : 1; - - phy_pdev = xgbe_get_phy_pdev(pdata); - if (!phy_pdev) { - dev_err(dev, "unable to obtain phy device\n"); - ret = -EINVAL; - goto err_phydev; - } - phy_dev = &phy_pdev->dev; - - if (pdev == phy_pdev) { - /* New style device tree or ACPI: - * The XGBE and PHY resources are grouped together with - * the PHY resources listed last - */ - phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3; - phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1; - } else { - /* Old style device tree: - * The XGBE and PHY resources are separate - */ - phy_memnum = 0; - phy_irqnum = 0; - } - - /* Set and validate the number of descriptors for a ring */ - BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT); - pdata->tx_desc_count = XGBE_TX_DESC_CNT; - if (pdata->tx_desc_count & (pdata->tx_desc_count - 1)) { - dev_err(dev, "tx descriptor count (%d) is not valid\n", - pdata->tx_desc_count); - ret = -EINVAL; - goto err_io; - } - BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT); - pdata->rx_desc_count = XGBE_RX_DESC_CNT; - if (pdata->rx_desc_count & (pdata->rx_desc_count - 1)) { - dev_err(dev, "rx descriptor count (%d) is not valid\n", - pdata->rx_desc_count); - ret = -EINVAL; - goto err_io; - } - - /* Obtain the mmio areas for the device */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - pdata->xgmac_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->xgmac_regs)) { - dev_err(dev, "xgmac ioremap failed\n"); - ret = PTR_ERR(pdata->xgmac_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs); - - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - pdata->xpcs_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->xpcs_regs)) { - dev_err(dev, "xpcs ioremap failed\n"); - ret = PTR_ERR(pdata->xpcs_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); - - res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); - pdata->rxtx_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->rxtx_regs)) { - dev_err(dev, "rxtx ioremap failed\n"); - ret = PTR_ERR(pdata->rxtx_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "rxtx_regs = %p\n", pdata->rxtx_regs); - - res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); - pdata->sir0_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->sir0_regs)) { - dev_err(dev, "sir0 ioremap failed\n"); - ret = PTR_ERR(pdata->sir0_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "sir0_regs = %p\n", pdata->sir0_regs); - - res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); - pdata->sir1_regs = devm_ioremap_resource(dev, res); - if (IS_ERR(pdata->sir1_regs)) { - dev_err(dev, "sir1 ioremap failed\n"); - ret = PTR_ERR(pdata->sir1_regs); - goto err_io; - } - if (netif_msg_probe(pdata)) - dev_dbg(dev, "sir1_regs = %p\n", pdata->sir1_regs); - - /* Retrieve the MAC address */ - ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY, - pdata->mac_addr, - sizeof(pdata->mac_addr)); - if (ret || !is_valid_ether_addr(pdata->mac_addr)) { - dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY); - if (!ret) - ret = -EINVAL; - goto err_io; - } - - /* Retrieve the PHY mode - it must be "xgmii" */ - ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY, - &phy_mode); - if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) { - dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY); - if (!ret) - ret = -EINVAL; - goto err_io; - } - pdata->phy_mode = PHY_INTERFACE_MODE_XGMII; + return pdata; +} - /* Check for per channel interrupt support */ - if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) - pdata->per_channel_irq = 1; +void xgbe_free_pdata(struct xgbe_prv_data *pdata) +{ + struct net_device *netdev = pdata->netdev; - /* Retrieve the PHY speedset */ - ret = device_property_read_u32(phy_dev, XGBE_SPEEDSET_PROPERTY, - &pdata->speed_set); - if (ret) { - dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY); - goto err_io; - } + free_netdev(netdev); +} - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - case XGBE_SPEEDSET_2500_10000: - break; - default: - dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY); - ret = -EINVAL; - goto err_io; - } +void xgbe_set_counts(struct xgbe_prv_data *pdata) +{ + /* Set all the function pointers */ + xgbe_init_all_fptrs(pdata); - /* Retrieve the PHY configuration properties */ - if (device_property_present(phy_dev, XGBE_BLWC_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_BLWC_PROPERTY, - pdata->serdes_blwc, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_BLWC_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_blwc, xgbe_serdes_blwc, - sizeof(pdata->serdes_blwc)); - } + /* Populate the hardware features */ + xgbe_get_all_hw_features(pdata); - if (device_property_present(phy_dev, XGBE_CDR_RATE_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_CDR_RATE_PROPERTY, - pdata->serdes_cdr_rate, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_CDR_RATE_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_cdr_rate, xgbe_serdes_cdr_rate, - sizeof(pdata->serdes_cdr_rate)); - } + /* Set default max values if not provided */ + if (!pdata->tx_max_channel_count) + pdata->tx_max_channel_count = pdata->hw_feat.tx_ch_cnt; + if (!pdata->rx_max_channel_count) + pdata->rx_max_channel_count = pdata->hw_feat.rx_ch_cnt; - if (device_property_present(phy_dev, XGBE_PQ_SKEW_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_PQ_SKEW_PROPERTY, - pdata->serdes_pq_skew, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_PQ_SKEW_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_pq_skew, xgbe_serdes_pq_skew, - sizeof(pdata->serdes_pq_skew)); - } + if (!pdata->tx_max_q_count) + pdata->tx_max_q_count = pdata->hw_feat.tx_q_cnt; + if (!pdata->rx_max_q_count) + pdata->rx_max_q_count = pdata->hw_feat.rx_q_cnt; - if (device_property_present(phy_dev, XGBE_TX_AMP_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_TX_AMP_PROPERTY, - pdata->serdes_tx_amp, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_TX_AMP_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_tx_amp, xgbe_serdes_tx_amp, - sizeof(pdata->serdes_tx_amp)); - } + /* Calculate the number of Tx and Rx rings to be created + * -Tx (DMA) Channels map 1-to-1 to Tx Queues so set + * the number of Tx queues to the number of Tx channels + * enabled + * -Rx (DMA) Channels do not map 1-to-1 so use the actual + * number of Rx queues or maximum allowed + */ + pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(), + pdata->hw_feat.tx_ch_cnt); + pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, + pdata->tx_max_channel_count); + pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, + pdata->tx_max_q_count); - if (device_property_present(phy_dev, XGBE_DFE_CFG_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_DFE_CFG_PROPERTY, - pdata->serdes_dfe_tap_cfg, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_DFE_CFG_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_dfe_tap_cfg, xgbe_serdes_dfe_tap_cfg, - sizeof(pdata->serdes_dfe_tap_cfg)); - } + pdata->tx_q_count = pdata->tx_ring_count; - if (device_property_present(phy_dev, XGBE_DFE_ENA_PROPERTY)) { - ret = device_property_read_u32_array(phy_dev, - XGBE_DFE_ENA_PROPERTY, - pdata->serdes_dfe_tap_ena, - XGBE_SPEEDS); - if (ret) { - dev_err(dev, "invalid %s property\n", - XGBE_DFE_ENA_PROPERTY); - goto err_io; - } - } else { - memcpy(pdata->serdes_dfe_tap_ena, xgbe_serdes_dfe_tap_ena, - sizeof(pdata->serdes_dfe_tap_ena)); - } + pdata->rx_ring_count = min_t(unsigned int, num_online_cpus(), + pdata->hw_feat.rx_ch_cnt); + pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count, + pdata->rx_max_channel_count); - /* Obtain device settings unique to ACPI/OF */ - if (pdata->use_acpi) - ret = xgbe_acpi_support(pdata); - else - ret = xgbe_of_support(pdata); - if (ret) - goto err_io; - - /* Set the DMA coherency values */ - attr = device_get_dma_attr(dev); - if (attr == DEV_DMA_NOT_SUPPORTED) { - dev_err(dev, "DMA is not supported"); - ret = -ENODEV; - goto err_io; - } - pdata->coherent = (attr == DEV_DMA_COHERENT); - if (pdata->coherent) { - pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; - pdata->arcache = XGBE_DMA_OS_ARCACHE; - pdata->awcache = XGBE_DMA_OS_AWCACHE; - } else { - pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN; - pdata->arcache = XGBE_DMA_SYS_ARCACHE; - pdata->awcache = XGBE_DMA_SYS_AWCACHE; - } + pdata->rx_q_count = min_t(unsigned int, pdata->hw_feat.rx_q_cnt, + pdata->rx_max_q_count); - /* Get the device interrupt */ - ret = platform_get_irq(pdev, 0); - if (ret < 0) { - dev_err(dev, "platform_get_irq 0 failed\n"); - goto err_io; + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "TX/RX DMA channel count = %u/%u\n", + pdata->tx_ring_count, pdata->rx_ring_count); + dev_dbg(pdata->dev, "TX/RX hardware queue count = %u/%u\n", + pdata->tx_q_count, pdata->rx_q_count); } - pdata->dev_irq = ret; +} - /* Get the auto-negotiation interrupt */ - ret = platform_get_irq(phy_pdev, phy_irqnum++); - if (ret < 0) { - dev_err(dev, "platform_get_irq phy 0 failed\n"); - goto err_io; - } - pdata->an_irq = ret; +int xgbe_config_netdev(struct xgbe_prv_data *pdata) +{ + struct net_device *netdev = pdata->netdev; + struct device *dev = pdata->dev; + unsigned int i; + int ret; netdev->irq = pdata->dev_irq; netdev->base_addr = (unsigned long)pdata->xgmac_regs; memcpy(netdev->dev_addr, pdata->mac_addr, netdev->addr_len); - /* Set all the function pointers */ - xgbe_init_all_fptrs(pdata); + /* Initialize ECC timestamps */ + pdata->tx_sec_period = jiffies; + pdata->tx_ded_period = jiffies; + pdata->rx_sec_period = jiffies; + pdata->rx_ded_period = jiffies; + pdata->desc_sec_period = jiffies; + pdata->desc_ded_period = jiffies; /* Issue software reset to device */ pdata->hw_if.exit(pdata); - /* Populate the hardware features */ - xgbe_get_all_hw_features(pdata); - /* Set default configuration data */ xgbe_default_config(pdata); @@ -664,33 +287,46 @@ static int xgbe_probe(struct platform_device *pdev) DMA_BIT_MASK(pdata->hw_feat.dma_width)); if (ret) { dev_err(dev, "dma_set_mask_and_coherent failed\n"); - goto err_io; + return ret; } - /* Calculate the number of Tx and Rx rings to be created - * -Tx (DMA) Channels map 1-to-1 to Tx Queues so set - * the number of Tx queues to the number of Tx channels - * enabled - * -Rx (DMA) Channels do not map 1-to-1 so use the actual - * number of Rx queues - */ - pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(), - pdata->hw_feat.tx_ch_cnt); - pdata->tx_q_count = pdata->tx_ring_count; + /* Set default max values if not provided */ + if (!pdata->tx_max_fifo_size) + pdata->tx_max_fifo_size = pdata->hw_feat.tx_fifo_size; + if (!pdata->rx_max_fifo_size) + pdata->rx_max_fifo_size = pdata->hw_feat.rx_fifo_size; + + /* Set and validate the number of descriptors for a ring */ + BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT); + pdata->tx_desc_count = XGBE_TX_DESC_CNT; + + BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT); + pdata->rx_desc_count = XGBE_RX_DESC_CNT; + + /* Adjust the number of queues based on interrupts assigned */ + if (pdata->channel_irq_count) { + pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count, + pdata->channel_irq_count); + pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count, + pdata->channel_irq_count); + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, + "adjusted TX/RX DMA channel count = %u/%u\n", + pdata->tx_ring_count, pdata->rx_ring_count); + } + + /* Set the number of queues */ ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count); if (ret) { dev_err(dev, "error setting real tx queue count\n"); - goto err_io; + return ret; } - pdata->rx_ring_count = min_t(unsigned int, - netif_get_num_default_rss_queues(), - pdata->hw_feat.rx_ch_cnt); - pdata->rx_q_count = pdata->hw_feat.rx_q_cnt; ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count); if (ret) { dev_err(dev, "error setting real rx queue count\n"); - goto err_io; + return ret; } /* Initialize RSS hash key and lookup table */ @@ -705,7 +341,9 @@ static int xgbe_probe(struct platform_device *pdev) XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); /* Call MDIO/PHY initialization routine */ - pdata->phy_if.phy_init(pdata); + ret = pdata->phy_if.phy_init(pdata); + if (ret) + return ret; /* Set device operations */ netdev->netdev_ops = xgbe_get_netdev_ops(); @@ -752,13 +390,21 @@ static int xgbe_probe(struct platform_device *pdev) ret = register_netdev(netdev); if (ret) { dev_err(dev, "net device registration failed\n"); - goto err_io; + return ret; } /* Create the PHY/ANEG name based on netdev name */ snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs", netdev_name(netdev)); + /* Create the ECC name based on netdev name */ + snprintf(pdata->ecc_name, sizeof(pdata->ecc_name) - 1, "%s-ecc", + netdev_name(netdev)); + + /* Create the I2C name based on netdev name */ + snprintf(pdata->i2c_name, sizeof(pdata->i2c_name) - 1, "%s-i2c", + netdev_name(netdev)); + /* Create workqueues */ pdata->dev_workqueue = create_singlethread_workqueue(netdev_name(netdev)); @@ -780,11 +426,10 @@ static int xgbe_probe(struct platform_device *pdev) xgbe_debugfs_init(pdata); - platform_device_put(phy_pdev); - - netdev_notice(netdev, "net device enabled\n"); - - DBGPR("<-- xgbe_probe\n"); + netif_dbg(pdata, drv, pdata->netdev, "%u Tx software queues\n", + pdata->tx_ring_count); + netif_dbg(pdata, drv, pdata->netdev, "%u Rx software queues\n", + pdata->rx_ring_count); return 0; @@ -794,29 +439,19 @@ err_wq: err_netdev: unregister_netdev(netdev); -err_io: - platform_device_put(phy_pdev); - -err_phydev: - free_netdev(netdev); - -err_alloc: - dev_notice(dev, "net device not enabled\n"); - return ret; } -static int xgbe_remove(struct platform_device *pdev) +void xgbe_deconfig_netdev(struct xgbe_prv_data *pdata) { - struct net_device *netdev = platform_get_drvdata(pdev); - struct xgbe_prv_data *pdata = netdev_priv(netdev); - - DBGPR("-->xgbe_remove\n"); + struct net_device *netdev = pdata->netdev; xgbe_debugfs_exit(pdata); xgbe_ptp_unregister(pdata); + pdata->phy_if.phy_exit(pdata); + flush_workqueue(pdata->an_workqueue); destroy_workqueue(pdata->an_workqueue); @@ -824,94 +459,29 @@ static int xgbe_remove(struct platform_device *pdev) destroy_workqueue(pdata->dev_workqueue); unregister_netdev(netdev); - - free_netdev(netdev); - - DBGPR("<--xgbe_remove\n"); - - return 0; } -#ifdef CONFIG_PM -static int xgbe_suspend(struct device *dev) +static int __init xgbe_mod_init(void) { - struct net_device *netdev = dev_get_drvdata(dev); - struct xgbe_prv_data *pdata = netdev_priv(netdev); - int ret = 0; - - DBGPR("-->xgbe_suspend\n"); - - if (netif_running(netdev)) - ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + int ret; - pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + ret = xgbe_platform_init(); + if (ret) + return ret; - DBGPR("<--xgbe_suspend\n"); + ret = xgbe_pci_init(); + if (ret) + return ret; - return ret; + return 0; } -static int xgbe_resume(struct device *dev) +static void __exit xgbe_mod_exit(void) { - struct net_device *netdev = dev_get_drvdata(dev); - struct xgbe_prv_data *pdata = netdev_priv(netdev); - int ret = 0; - - DBGPR("-->xgbe_resume\n"); - - pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); - - if (netif_running(netdev)) { - ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); - - /* Schedule a restart in case the link or phy state changed - * while we were powered down. - */ - schedule_work(&pdata->restart_work); - } - - DBGPR("<--xgbe_resume\n"); + xgbe_pci_exit(); - return ret; + xgbe_platform_exit(); } -#endif /* CONFIG_PM */ - -#ifdef CONFIG_ACPI -static const struct acpi_device_id xgbe_acpi_match[] = { - { "AMDI8001", 0 }, - {}, -}; - -MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match); -#endif - -#ifdef CONFIG_OF -static const struct of_device_id xgbe_of_match[] = { - { .compatible = "amd,xgbe-seattle-v1a", }, - {}, -}; - -MODULE_DEVICE_TABLE(of, xgbe_of_match); -#endif - -static SIMPLE_DEV_PM_OPS(xgbe_pm_ops, xgbe_suspend, xgbe_resume); - -static struct platform_driver xgbe_driver = { - .driver = { - .name = "amd-xgbe", -#ifdef CONFIG_ACPI - .acpi_match_table = xgbe_acpi_match, -#endif -#ifdef CONFIG_OF - .of_match_table = xgbe_of_match, -#endif - .pm = &xgbe_pm_ops, - }, - .probe = xgbe_probe, - .remove = xgbe_remove, -}; -module_platform_driver(xgbe_driver); +module_init(xgbe_mod_init); +module_exit(xgbe_mod_exit); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 84c5d296d13e..0ecae7045044 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -125,303 +125,284 @@ #include "xgbe.h" #include "xgbe-common.h" -static void xgbe_an_enable_kr_training(struct xgbe_prv_data *pdata) +static void xgbe_an37_clear_interrupts(struct xgbe_prv_data *pdata) { - unsigned int reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); + int reg; - reg |= XGBE_KR_TRAINING_ENABLE; - XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT); + reg &= ~XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg); } -static void xgbe_an_disable_kr_training(struct xgbe_prv_data *pdata) +static void xgbe_an37_disable_interrupts(struct xgbe_prv_data *pdata) { - unsigned int reg; + int reg; - reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL); + reg &= ~XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg); - reg &= ~XGBE_KR_TRAINING_ENABLE; - XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL); + reg &= ~XGBE_PCS_CL37_BP; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg); } -static void xgbe_pcs_power_cycle(struct xgbe_prv_data *pdata) +static void xgbe_an37_enable_interrupts(struct xgbe_prv_data *pdata) { - unsigned int reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + int reg; - reg |= MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL); + reg |= XGBE_PCS_CL37_BP; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg); - usleep_range(75, 100); - - reg &= ~MDIO_CTRL1_LPOWER; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL); + reg |= XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg); } -static void xgbe_serdes_start_ratechange(struct xgbe_prv_data *pdata) +static void xgbe_an73_clear_interrupts(struct xgbe_prv_data *pdata) { - /* Assert Rx and Tx ratechange */ - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1); + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); } -static void xgbe_serdes_complete_ratechange(struct xgbe_prv_data *pdata) +static void xgbe_an73_disable_interrupts(struct xgbe_prv_data *pdata) { - unsigned int wait; - u16 status; - - /* Release Rx and Tx ratechange */ - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0); + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); +} - /* Wait for Rx and Tx ready */ - wait = XGBE_RATECHANGE_COUNT; - while (wait--) { - usleep_range(50, 75); +static void xgbe_an73_enable_interrupts(struct xgbe_prv_data *pdata) +{ + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_CL73_INT_MASK); +} - status = XSIR0_IOREAD(pdata, SIR0_STATUS); - if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && - XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) - goto rx_reset; +static void xgbe_an_enable_interrupts(struct xgbe_prv_data *pdata) +{ + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_enable_interrupts(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_enable_interrupts(pdata); + break; + default: + break; } +} - netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n", - status); - -rx_reset: - /* Perform Rx reset for the DFE changes */ - XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1); +static void xgbe_an_clear_interrupts_all(struct xgbe_prv_data *pdata) +{ + xgbe_an73_clear_interrupts(pdata); + xgbe_an37_clear_interrupts(pdata); } -static void xgbe_xgmii_mode(struct xgbe_prv_data *pdata) +static void xgbe_an73_enable_kr_training(struct xgbe_prv_data *pdata) { unsigned int reg; - /* Enable KR training */ - xgbe_an_enable_kr_training(pdata); - - /* Set MAC to 10G speed */ - pdata->hw_if.set_xgmii_speed(pdata); - - /* Set PCS to KR/10G speed */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - reg &= ~MDIO_PCS_CTRL2_TYPE; - reg |= MDIO_PCS_CTRL2_10GBR; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg &= ~MDIO_CTRL1_SPEEDSEL; - reg |= MDIO_CTRL1_SPEED10G; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + reg |= XGBE_KR_TRAINING_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); +} - xgbe_pcs_power_cycle(pdata); +static void xgbe_an73_disable_kr_training(struct xgbe_prv_data *pdata) +{ + unsigned int reg; - /* Set SerDes to 10G speed */ - xgbe_serdes_start_ratechange(pdata); + reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL); + reg &= ~XGBE_KR_TRAINING_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); +} - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, - pdata->serdes_cdr_rate[XGBE_SPEED_10000]); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, - pdata->serdes_tx_amp[XGBE_SPEED_10000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, - pdata->serdes_blwc[XGBE_SPEED_10000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, - pdata->serdes_pq_skew[XGBE_SPEED_10000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, - pdata->serdes_dfe_tap_cfg[XGBE_SPEED_10000]); - XRXTX_IOWRITE(pdata, RXTX_REG22, - pdata->serdes_dfe_tap_ena[XGBE_SPEED_10000]); +static void xgbe_kr_mode(struct xgbe_prv_data *pdata) +{ + /* Enable KR training */ + xgbe_an73_enable_kr_training(pdata); - xgbe_serdes_complete_ratechange(pdata); + /* Set MAC to 10G speed */ + pdata->hw_if.set_speed(pdata, SPEED_10000); - netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n"); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KR); } -static void xgbe_gmii_2500_mode(struct xgbe_prv_data *pdata) +static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata) { - unsigned int reg; - /* Disable KR training */ - xgbe_an_disable_kr_training(pdata); + xgbe_an73_disable_kr_training(pdata); /* Set MAC to 2.5G speed */ - pdata->hw_if.set_gmii_2500_speed(pdata); + pdata->hw_if.set_speed(pdata, SPEED_2500); - /* Set PCS to KX/1G speed */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - reg &= ~MDIO_PCS_CTRL2_TYPE; - reg |= MDIO_PCS_CTRL2_10GBX; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_2500); +} - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg &= ~MDIO_CTRL1_SPEEDSEL; - reg |= MDIO_CTRL1_SPEED1G; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); +static void xgbe_kx_1000_mode(struct xgbe_prv_data *pdata) +{ + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - xgbe_pcs_power_cycle(pdata); + /* Set MAC to 1G speed */ + pdata->hw_if.set_speed(pdata, SPEED_1000); - /* Set SerDes to 2.5G speed */ - xgbe_serdes_start_ratechange(pdata); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_1000); +} - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL); +static void xgbe_sfi_mode(struct xgbe_prv_data *pdata) +{ + /* If a KR re-driver is present, change to KR mode instead */ + if (pdata->kr_redrv) + return xgbe_kr_mode(pdata); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, - pdata->serdes_cdr_rate[XGBE_SPEED_2500]); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, - pdata->serdes_tx_amp[XGBE_SPEED_2500]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, - pdata->serdes_blwc[XGBE_SPEED_2500]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, - pdata->serdes_pq_skew[XGBE_SPEED_2500]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, - pdata->serdes_dfe_tap_cfg[XGBE_SPEED_2500]); - XRXTX_IOWRITE(pdata, RXTX_REG22, - pdata->serdes_dfe_tap_ena[XGBE_SPEED_2500]); + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - xgbe_serdes_complete_ratechange(pdata); + /* Set MAC to 10G speed */ + pdata->hw_if.set_speed(pdata, SPEED_10000); - netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n"); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SFI); } -static void xgbe_gmii_mode(struct xgbe_prv_data *pdata) +static void xgbe_x_mode(struct xgbe_prv_data *pdata) { - unsigned int reg; - /* Disable KR training */ - xgbe_an_disable_kr_training(pdata); + xgbe_an73_disable_kr_training(pdata); /* Set MAC to 1G speed */ - pdata->hw_if.set_gmii_speed(pdata); + pdata->hw_if.set_speed(pdata, SPEED_1000); - /* Set PCS to KX/1G speed */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - reg &= ~MDIO_PCS_CTRL2_TYPE; - reg |= MDIO_PCS_CTRL2_10GBX; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg &= ~MDIO_CTRL1_SPEEDSEL; - reg |= MDIO_CTRL1_SPEED1G; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_X); +} - xgbe_pcs_power_cycle(pdata); +static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata) +{ + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - /* Set SerDes to 1G speed */ - xgbe_serdes_start_ratechange(pdata); + /* Set MAC to 1G speed */ + pdata->hw_if.set_speed(pdata, SPEED_1000); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_1000); +} - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, - pdata->serdes_cdr_rate[XGBE_SPEED_1000]); - XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, - pdata->serdes_tx_amp[XGBE_SPEED_1000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, - pdata->serdes_blwc[XGBE_SPEED_1000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, - pdata->serdes_pq_skew[XGBE_SPEED_1000]); - XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, - pdata->serdes_dfe_tap_cfg[XGBE_SPEED_1000]); - XRXTX_IOWRITE(pdata, RXTX_REG22, - pdata->serdes_dfe_tap_ena[XGBE_SPEED_1000]); +static void xgbe_sgmii_100_mode(struct xgbe_prv_data *pdata) +{ + /* Disable KR training */ + xgbe_an73_disable_kr_training(pdata); - xgbe_serdes_complete_ratechange(pdata); + /* Set MAC to 1G speed */ + pdata->hw_if.set_speed(pdata, SPEED_1000); - netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n"); + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_100); } -static void xgbe_cur_mode(struct xgbe_prv_data *pdata, - enum xgbe_mode *mode) +static enum xgbe_mode xgbe_cur_mode(struct xgbe_prv_data *pdata) { - unsigned int reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); - if ((reg & MDIO_PCS_CTRL2_TYPE) == MDIO_PCS_CTRL2_10GBR) - *mode = XGBE_MODE_KR; - else - *mode = XGBE_MODE_KX; + return pdata->phy_if.phy_impl.cur_mode(pdata); } static bool xgbe_in_kr_mode(struct xgbe_prv_data *pdata) { - enum xgbe_mode mode; - - xgbe_cur_mode(pdata, &mode); + return (xgbe_cur_mode(pdata) == XGBE_MODE_KR); +} - return (mode == XGBE_MODE_KR); +static void xgbe_change_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + xgbe_kx_1000_mode(pdata); + break; + case XGBE_MODE_KX_2500: + xgbe_kx_2500_mode(pdata); + break; + case XGBE_MODE_KR: + xgbe_kr_mode(pdata); + break; + case XGBE_MODE_SGMII_100: + xgbe_sgmii_100_mode(pdata); + break; + case XGBE_MODE_SGMII_1000: + xgbe_sgmii_1000_mode(pdata); + break; + case XGBE_MODE_X: + xgbe_x_mode(pdata); + break; + case XGBE_MODE_SFI: + xgbe_sfi_mode(pdata); + break; + case XGBE_MODE_UNKNOWN: + break; + default: + netif_dbg(pdata, link, pdata->netdev, + "invalid operation mode requested (%u)\n", mode); + } } static void xgbe_switch_mode(struct xgbe_prv_data *pdata) { - /* If we are in KR switch to KX, and vice-versa */ - if (xgbe_in_kr_mode(pdata)) { - if (pdata->speed_set == XGBE_SPEEDSET_1000_10000) - xgbe_gmii_mode(pdata); - else - xgbe_gmii_2500_mode(pdata); - } else { - xgbe_xgmii_mode(pdata); - } + xgbe_change_mode(pdata, pdata->phy_if.phy_impl.switch_mode(pdata)); } static void xgbe_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) { - enum xgbe_mode cur_mode; + if (mode == xgbe_cur_mode(pdata)) + return; - xgbe_cur_mode(pdata, &cur_mode); - if (mode != cur_mode) - xgbe_switch_mode(pdata); + xgbe_change_mode(pdata, mode); } -static bool xgbe_use_xgmii_mode(struct xgbe_prv_data *pdata) +static bool xgbe_use_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) { - if (pdata->phy.autoneg == AUTONEG_ENABLE) { - if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) - return true; - } else { - if (pdata->phy.speed == SPEED_10000) - return true; - } + return pdata->phy_if.phy_impl.use_mode(pdata, mode); +} + +static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable, + bool restart) +{ + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_CTRL1); + reg &= ~MDIO_VEND2_CTRL1_AN_ENABLE; - return false; + if (enable) + reg |= MDIO_VEND2_CTRL1_AN_ENABLE; + + if (restart) + reg |= MDIO_VEND2_CTRL1_AN_RESTART; + + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg); } -static bool xgbe_use_gmii_2500_mode(struct xgbe_prv_data *pdata) +static void xgbe_an37_restart(struct xgbe_prv_data *pdata) { - if (pdata->phy.autoneg == AUTONEG_ENABLE) { - if (pdata->phy.advertising & ADVERTISED_2500baseX_Full) - return true; - } else { - if (pdata->phy.speed == SPEED_2500) - return true; - } + xgbe_an37_enable_interrupts(pdata); + xgbe_an37_set(pdata, true, true); - return false; + netif_dbg(pdata, link, pdata->netdev, "CL37 AN enabled/restarted\n"); } -static bool xgbe_use_gmii_mode(struct xgbe_prv_data *pdata) +static void xgbe_an37_disable(struct xgbe_prv_data *pdata) { - if (pdata->phy.autoneg == AUTONEG_ENABLE) { - if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full) - return true; - } else { - if (pdata->phy.speed == SPEED_1000) - return true; - } + xgbe_an37_set(pdata, false, false); + xgbe_an37_disable_interrupts(pdata); - return false; + netif_dbg(pdata, link, pdata->netdev, "CL37 AN disabled\n"); } -static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart) +static void xgbe_an73_set(struct xgbe_prv_data *pdata, bool enable, + bool restart) { unsigned int reg; @@ -437,22 +418,62 @@ static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart) XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg); } -static void xgbe_restart_an(struct xgbe_prv_data *pdata) +static void xgbe_an73_restart(struct xgbe_prv_data *pdata) +{ + xgbe_an73_enable_interrupts(pdata); + xgbe_an73_set(pdata, true, true); + + netif_dbg(pdata, link, pdata->netdev, "CL73 AN enabled/restarted\n"); +} + +static void xgbe_an73_disable(struct xgbe_prv_data *pdata) { - xgbe_set_an(pdata, true, true); + xgbe_an73_set(pdata, false, false); + xgbe_an73_disable_interrupts(pdata); - netif_dbg(pdata, link, pdata->netdev, "AN enabled/restarted\n"); + netif_dbg(pdata, link, pdata->netdev, "CL73 AN disabled\n"); +} + +static void xgbe_an_restart(struct xgbe_prv_data *pdata) +{ + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_restart(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_restart(pdata); + break; + default: + break; + } } -static void xgbe_disable_an(struct xgbe_prv_data *pdata) +static void xgbe_an_disable(struct xgbe_prv_data *pdata) { - xgbe_set_an(pdata, false, false); + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_disable(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_disable(pdata); + break; + default: + break; + } +} - netif_dbg(pdata, link, pdata->netdev, "AN disabled\n"); +static void xgbe_an_disable_all(struct xgbe_prv_data *pdata) +{ + xgbe_an73_disable(pdata); + xgbe_an37_disable(pdata); } -static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { unsigned int ad_reg, lp_reg, reg; @@ -476,13 +497,15 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata, /* Start KR training */ reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL); if (reg & XGBE_KR_TRAINING_ENABLE) { - XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1); + if (pdata->phy_if.phy_impl.kr_training_pre) + pdata->phy_if.phy_impl.kr_training_pre(pdata); reg |= XGBE_KR_TRAINING_START; XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); - XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0); + if (pdata->phy_if.phy_impl.kr_training_post) + pdata->phy_if.phy_impl.kr_training_post(pdata); netif_dbg(pdata, link, pdata->netdev, "KR training initiated\n"); @@ -491,8 +514,8 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata, return XGBE_AN_PAGE_RECEIVED; } -static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_tx_xnp(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { u16 msg; @@ -508,8 +531,8 @@ static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata, return XGBE_AN_PAGE_RECEIVED; } -static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_rx_bpa(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { unsigned int link_support; unsigned int reg, ad_reg, lp_reg; @@ -528,12 +551,12 @@ static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata, return ((ad_reg & XGBE_XNP_NP_EXCHANGE) || (lp_reg & XGBE_XNP_NP_EXCHANGE)) - ? xgbe_an_tx_xnp(pdata, state) - : xgbe_an_tx_training(pdata, state); + ? xgbe_an73_tx_xnp(pdata, state) + : xgbe_an73_tx_training(pdata, state); } -static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata, - enum xgbe_rx *state) +static enum xgbe_an xgbe_an73_rx_xnp(struct xgbe_prv_data *pdata, + enum xgbe_rx *state) { unsigned int ad_reg, lp_reg; @@ -543,11 +566,11 @@ static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata, return ((ad_reg & XGBE_XNP_NP_EXCHANGE) || (lp_reg & XGBE_XNP_NP_EXCHANGE)) - ? xgbe_an_tx_xnp(pdata, state) - : xgbe_an_tx_training(pdata, state); + ? xgbe_an73_tx_xnp(pdata, state) + : xgbe_an73_tx_training(pdata, state); } -static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata) +static enum xgbe_an xgbe_an73_page_received(struct xgbe_prv_data *pdata) { enum xgbe_rx *state; unsigned long an_timeout; @@ -566,20 +589,20 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata) pdata->an_start = jiffies; netif_dbg(pdata, link, pdata->netdev, - "AN timed out, resetting state\n"); + "CL73 AN timed out, resetting state\n"); } } state = xgbe_in_kr_mode(pdata) ? &pdata->kr_state - : &pdata->kx_state; + : &pdata->kx_state; switch (*state) { case XGBE_RX_BPA: - ret = xgbe_an_rx_bpa(pdata, state); + ret = xgbe_an73_rx_bpa(pdata, state); break; case XGBE_RX_XNP: - ret = xgbe_an_rx_xnp(pdata, state); + ret = xgbe_an73_rx_xnp(pdata, state); break; default: @@ -589,7 +612,7 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata) return ret; } -static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata) +static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) { /* Be sure we aren't looping trying to negotiate */ if (xgbe_in_kr_mode(pdata)) { @@ -611,23 +634,43 @@ static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata) return XGBE_AN_NO_LINK; } - xgbe_disable_an(pdata); + xgbe_an73_disable(pdata); xgbe_switch_mode(pdata); - xgbe_restart_an(pdata); + xgbe_an73_restart(pdata); return XGBE_AN_INCOMPAT_LINK; } -static irqreturn_t xgbe_an_isr(int irq, void *data) +static void xgbe_an37_isr(struct xgbe_prv_data *pdata) { - struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + unsigned int reg; - netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n"); + /* Disable AN interrupts */ + xgbe_an37_disable_interrupts(pdata); + + /* Save the interrupt(s) that fired */ + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT); + pdata->an_int = reg & XGBE_AN_CL37_INT_MASK; + pdata->an_status = reg & ~XGBE_AN_CL37_INT_MASK; + if (pdata->an_int) { + /* Clear the interrupt(s) that fired and process them */ + reg &= ~XGBE_AN_CL37_INT_MASK; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg); + + queue_work(pdata->an_workqueue, &pdata->an_irq_work); + } else { + /* Enable AN interrupts */ + xgbe_an37_enable_interrupts(pdata); + } +} + +static void xgbe_an73_isr(struct xgbe_prv_data *pdata) +{ /* Disable AN interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); + xgbe_an73_disable_interrupts(pdata); /* Save the interrupt(s) that fired */ pdata->an_int = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_INT); @@ -639,13 +682,37 @@ static irqreturn_t xgbe_an_isr(int irq, void *data) queue_work(pdata->an_workqueue, &pdata->an_irq_work); } else { /* Enable AN interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, - XGBE_AN_INT_MASK); + xgbe_an73_enable_interrupts(pdata); + } +} + +static irqreturn_t xgbe_an_isr(int irq, void *data) +{ + struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + + netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n"); + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_isr(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_isr(pdata); + break; + default: + break; } return IRQ_HANDLED; } +static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata) +{ + return xgbe_an_isr(irq, pdata); +} + static void xgbe_an_irq_work(struct work_struct *work) { struct xgbe_prv_data *pdata = container_of(work, @@ -679,36 +746,87 @@ static const char *xgbe_state_as_string(enum xgbe_an state) } } -static void xgbe_an_state_machine(struct work_struct *work) +static void xgbe_an37_state_machine(struct xgbe_prv_data *pdata) { - struct xgbe_prv_data *pdata = container_of(work, - struct xgbe_prv_data, - an_work); enum xgbe_an cur_state = pdata->an_state; - mutex_lock(&pdata->an_mutex); + if (!pdata->an_int) + return; + + if (pdata->an_int & XGBE_AN_CL37_INT_CMPLT) { + pdata->an_state = XGBE_AN_COMPLETE; + pdata->an_int &= ~XGBE_AN_CL37_INT_CMPLT; + + /* If SGMII is enabled, check the link status */ + if ((pdata->an_mode == XGBE_AN_MODE_CL37_SGMII) && + !(pdata->an_status & XGBE_SGMII_AN_LINK_STATUS)) + pdata->an_state = XGBE_AN_NO_LINK; + } + + netif_dbg(pdata, link, pdata->netdev, "CL37 AN %s\n", + xgbe_state_as_string(pdata->an_state)); + + cur_state = pdata->an_state; + + switch (pdata->an_state) { + case XGBE_AN_READY: + break; + + case XGBE_AN_COMPLETE: + netif_dbg(pdata, link, pdata->netdev, + "Auto negotiation successful\n"); + break; + + case XGBE_AN_NO_LINK: + break; + + default: + pdata->an_state = XGBE_AN_ERROR; + } + + if (pdata->an_state == XGBE_AN_ERROR) { + netdev_err(pdata->netdev, + "error during auto-negotiation, state=%u\n", + cur_state); + + pdata->an_int = 0; + xgbe_an37_clear_interrupts(pdata); + } + + if (pdata->an_state >= XGBE_AN_COMPLETE) { + pdata->an_result = pdata->an_state; + pdata->an_state = XGBE_AN_READY; + + netif_dbg(pdata, link, pdata->netdev, "CL37 AN result: %s\n", + xgbe_state_as_string(pdata->an_result)); + } + + xgbe_an37_enable_interrupts(pdata); +} + +static void xgbe_an73_state_machine(struct xgbe_prv_data *pdata) +{ + enum xgbe_an cur_state = pdata->an_state; if (!pdata->an_int) - goto out; + return; next_int: - if (pdata->an_int & XGBE_AN_PG_RCV) { + if (pdata->an_int & XGBE_AN_CL73_PG_RCV) { pdata->an_state = XGBE_AN_PAGE_RECEIVED; - pdata->an_int &= ~XGBE_AN_PG_RCV; - } else if (pdata->an_int & XGBE_AN_INC_LINK) { + pdata->an_int &= ~XGBE_AN_CL73_PG_RCV; + } else if (pdata->an_int & XGBE_AN_CL73_INC_LINK) { pdata->an_state = XGBE_AN_INCOMPAT_LINK; - pdata->an_int &= ~XGBE_AN_INC_LINK; - } else if (pdata->an_int & XGBE_AN_INT_CMPLT) { + pdata->an_int &= ~XGBE_AN_CL73_INC_LINK; + } else if (pdata->an_int & XGBE_AN_CL73_INT_CMPLT) { pdata->an_state = XGBE_AN_COMPLETE; - pdata->an_int &= ~XGBE_AN_INT_CMPLT; + pdata->an_int &= ~XGBE_AN_CL73_INT_CMPLT; } else { pdata->an_state = XGBE_AN_ERROR; } - pdata->an_result = pdata->an_state; - again: - netif_dbg(pdata, link, pdata->netdev, "AN %s\n", + netif_dbg(pdata, link, pdata->netdev, "CL73 AN %s\n", xgbe_state_as_string(pdata->an_state)); cur_state = pdata->an_state; @@ -719,14 +837,14 @@ again: break; case XGBE_AN_PAGE_RECEIVED: - pdata->an_state = xgbe_an_page_received(pdata); + pdata->an_state = xgbe_an73_page_received(pdata); pdata->an_supported++; break; case XGBE_AN_INCOMPAT_LINK: pdata->an_supported = 0; pdata->parallel_detect = 0; - pdata->an_state = xgbe_an_incompat_link(pdata); + pdata->an_state = xgbe_an73_incompat_link(pdata); break; case XGBE_AN_COMPLETE: @@ -745,14 +863,14 @@ again: if (pdata->an_state == XGBE_AN_NO_LINK) { pdata->an_int = 0; - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an73_clear_interrupts(pdata); } else if (pdata->an_state == XGBE_AN_ERROR) { netdev_err(pdata->netdev, "error during auto-negotiation, state=%u\n", cur_state); pdata->an_int = 0; - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an73_clear_interrupts(pdata); } if (pdata->an_state >= XGBE_AN_COMPLETE) { @@ -762,7 +880,7 @@ again: pdata->kx_state = XGBE_RX_BPA; pdata->an_start = 0; - netif_dbg(pdata, link, pdata->netdev, "AN result: %s\n", + netif_dbg(pdata, link, pdata->netdev, "CL73 AN result: %s\n", xgbe_state_as_string(pdata->an_result)); } @@ -772,20 +890,88 @@ again: if (pdata->an_int) goto next_int; -out: - /* Enable AN interrupts on the way out */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_INT_MASK); + xgbe_an73_enable_interrupts(pdata); +} + +static void xgbe_an_state_machine(struct work_struct *work) +{ + struct xgbe_prv_data *pdata = container_of(work, + struct xgbe_prv_data, + an_work); + + mutex_lock(&pdata->an_mutex); + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_state_machine(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_state_machine(pdata); + break; + default: + break; + } mutex_unlock(&pdata->an_mutex); } -static void xgbe_an_init(struct xgbe_prv_data *pdata) +static void xgbe_an37_init(struct xgbe_prv_data *pdata) { - unsigned int reg; + unsigned int advertising, reg; + + advertising = pdata->phy_if.phy_impl.an_advertising(pdata); + + /* Set up Advertisement register */ + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE); + if (advertising & ADVERTISED_Pause) + reg |= 0x100; + else + reg &= ~0x100; + + if (advertising & ADVERTISED_Asym_Pause) + reg |= 0x80; + else + reg &= ~0x80; + + /* Full duplex, but not half */ + reg |= XGBE_AN_CL37_FD_MASK; + reg &= ~XGBE_AN_CL37_HD_MASK; + + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE, reg); + + /* Set up the Control register */ + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL); + reg &= XGBE_AN_CL37_TX_CONFIG_MASK; + reg &= XGBE_AN_CL37_PCS_MODE_MASK; + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL37: + reg |= XGBE_AN_CL37_PCS_MODE_BASEX; + break; + case XGBE_AN_MODE_CL37_SGMII: + reg |= XGBE_AN_CL37_PCS_MODE_SGMII; + break; + default: + break; + } + + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg); + + netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n", + (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII"); +} + +static void xgbe_an73_init(struct xgbe_prv_data *pdata) +{ + unsigned int advertising, reg; + + advertising = pdata->phy_if.phy_impl.an_advertising(pdata); /* Set up Advertisement register 3 first */ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); - if (pdata->phy.advertising & ADVERTISED_10000baseR_FEC) + if (advertising & ADVERTISED_10000baseR_FEC) reg |= 0xc000; else reg &= ~0xc000; @@ -794,13 +980,13 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata) /* Set up Advertisement register 2 next */ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); - if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) + if (advertising & ADVERTISED_10000baseKR_Full) reg |= 0x80; else reg &= ~0x80; - if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) || - (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) + if ((advertising & ADVERTISED_1000baseKX_Full) || + (advertising & ADVERTISED_2500baseX_Full)) reg |= 0x20; else reg &= ~0x20; @@ -809,12 +995,12 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata) /* Set up Advertisement register 1 last */ reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); - if (pdata->phy.advertising & ADVERTISED_Pause) + if (advertising & ADVERTISED_Pause) reg |= 0x400; else reg &= ~0x400; - if (pdata->phy.advertising & ADVERTISED_Asym_Pause) + if (advertising & ADVERTISED_Asym_Pause) reg |= 0x800; else reg &= ~0x800; @@ -824,7 +1010,25 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata) XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg); - netif_dbg(pdata, link, pdata->netdev, "AN initialized\n"); + netif_dbg(pdata, link, pdata->netdev, "CL73 AN initialized\n"); +} + +static void xgbe_an_init(struct xgbe_prv_data *pdata) +{ + /* Set up advertisement registers based on current settings */ + pdata->an_mode = pdata->phy_if.phy_impl.an_mode(pdata); + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + xgbe_an73_init(pdata); + break; + case XGBE_AN_MODE_CL37: + case XGBE_AN_MODE_CL37_SGMII: + xgbe_an37_init(pdata); + break; + default: + break; + } } static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata) @@ -842,6 +1046,8 @@ static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata) static const char *xgbe_phy_speed_string(int speed) { switch (speed) { + case SPEED_100: + return "100Mbps"; case SPEED_1000: return "1Gbps"; case SPEED_2500: @@ -907,24 +1113,32 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata) xgbe_phy_print_status(pdata); } +static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) +{ + return pdata->phy_if.phy_impl.valid_speed(pdata, speed); +} + static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) { + enum xgbe_mode mode; + netif_dbg(pdata, link, pdata->netdev, "fixed PHY configuration\n"); /* Disable auto-negotiation */ - xgbe_disable_an(pdata); - - /* Validate/Set specified speed */ - switch (pdata->phy.speed) { - case SPEED_10000: - xgbe_set_mode(pdata, XGBE_MODE_KR); + xgbe_an_disable(pdata); + + /* Set specified mode for specified speed */ + mode = pdata->phy_if.phy_impl.get_mode(pdata, pdata->phy.speed); + switch (mode) { + case XGBE_MODE_KX_1000: + case XGBE_MODE_KX_2500: + case XGBE_MODE_KR: + case XGBE_MODE_SGMII_100: + case XGBE_MODE_SGMII_1000: + case XGBE_MODE_X: + case XGBE_MODE_SFI: break; - - case SPEED_2500: - case SPEED_1000: - xgbe_set_mode(pdata, XGBE_MODE_KX); - break; - + case XGBE_MODE_UNKNOWN: default: return -EINVAL; } @@ -933,38 +1147,60 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) if (pdata->phy.duplex != DUPLEX_FULL) return -EINVAL; + xgbe_set_mode(pdata, mode); + return 0; } static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata) { + int ret; + set_bit(XGBE_LINK_INIT, &pdata->dev_state); pdata->link_check = jiffies; - if (pdata->phy.autoneg != AUTONEG_ENABLE) - return xgbe_phy_config_fixed(pdata); + ret = pdata->phy_if.phy_impl.an_config(pdata); + if (ret) + return ret; + + if (pdata->phy.autoneg != AUTONEG_ENABLE) { + ret = xgbe_phy_config_fixed(pdata); + if (ret || !pdata->kr_redrv) + return ret; - netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n"); + netif_dbg(pdata, link, pdata->netdev, "AN redriver support\n"); + } else { + netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n"); + } /* Disable auto-negotiation interrupt */ disable_irq(pdata->an_irq); /* Start auto-negotiation in a supported mode */ - if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) { + if (xgbe_use_mode(pdata, XGBE_MODE_KR)) { xgbe_set_mode(pdata, XGBE_MODE_KR); - } else if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) || - (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) { - xgbe_set_mode(pdata, XGBE_MODE_KX); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) { + xgbe_set_mode(pdata, XGBE_MODE_KX_2500); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) { + xgbe_set_mode(pdata, XGBE_MODE_KX_1000); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) { + xgbe_set_mode(pdata, XGBE_MODE_SFI); + } else if (xgbe_use_mode(pdata, XGBE_MODE_X)) { + xgbe_set_mode(pdata, XGBE_MODE_X); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) { + xgbe_set_mode(pdata, XGBE_MODE_SGMII_1000); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) { + xgbe_set_mode(pdata, XGBE_MODE_SGMII_100); } else { enable_irq(pdata->an_irq); return -EINVAL; } /* Disable and stop any in progress auto-negotiation */ - xgbe_disable_an(pdata); + xgbe_an_disable_all(pdata); /* Clear any auto-negotitation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an_clear_interrupts_all(pdata); pdata->an_result = XGBE_AN_READY; pdata->an_state = XGBE_AN_READY; @@ -974,11 +1210,8 @@ static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata) /* Re-enable auto-negotiation interrupt */ enable_irq(pdata->an_irq); - /* Set up advertisement registers based on current settings */ xgbe_an_init(pdata); - - /* Enable and start auto-negotiation */ - xgbe_restart_an(pdata); + xgbe_an_restart(pdata); return 0; } @@ -1016,108 +1249,52 @@ static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata) } } -static void xgbe_phy_status_force(struct xgbe_prv_data *pdata) +static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata) { - if (xgbe_in_kr_mode(pdata)) { - pdata->phy.speed = SPEED_10000; - } else { - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.speed = SPEED_1000; - break; - - case XGBE_SPEEDSET_2500_10000: - pdata->phy.speed = SPEED_2500; - break; - } - } - pdata->phy.duplex = DUPLEX_FULL; + return pdata->phy_if.phy_impl.an_outcome(pdata); } -static void xgbe_phy_status_aneg(struct xgbe_prv_data *pdata) +static void xgbe_phy_status_result(struct xgbe_prv_data *pdata) { - unsigned int ad_reg, lp_reg; + enum xgbe_mode mode; pdata->phy.lp_advertising = 0; if ((pdata->phy.autoneg != AUTONEG_ENABLE) || pdata->parallel_detect) - return xgbe_phy_status_force(pdata); - - pdata->phy.lp_advertising |= ADVERTISED_Autoneg; - pdata->phy.lp_advertising |= ADVERTISED_Backplane; - - /* Compare Advertisement and Link Partner register 1 */ - ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); - lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); - if (lp_reg & 0x400) - pdata->phy.lp_advertising |= ADVERTISED_Pause; - if (lp_reg & 0x800) - pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; - - if (pdata->phy.pause_autoneg) { - /* Set flow control based on auto-negotiation result */ - pdata->phy.tx_pause = 0; - pdata->phy.rx_pause = 0; - - if (ad_reg & lp_reg & 0x400) { - pdata->phy.tx_pause = 1; - pdata->phy.rx_pause = 1; - } else if (ad_reg & lp_reg & 0x800) { - if (ad_reg & 0x400) - pdata->phy.rx_pause = 1; - else if (lp_reg & 0x400) - pdata->phy.tx_pause = 1; - } - } - - /* Compare Advertisement and Link Partner register 2 */ - ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); - lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); - if (lp_reg & 0x80) - pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; - if (lp_reg & 0x20) { - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; - break; - case XGBE_SPEEDSET_2500_10000: - pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full; - break; - } - } + mode = xgbe_cur_mode(pdata); + else + mode = xgbe_phy_status_aneg(pdata); - ad_reg &= lp_reg; - if (ad_reg & 0x80) { + switch (mode) { + case XGBE_MODE_SGMII_100: + pdata->phy.speed = SPEED_100; + break; + case XGBE_MODE_X: + case XGBE_MODE_KX_1000: + case XGBE_MODE_SGMII_1000: + pdata->phy.speed = SPEED_1000; + break; + case XGBE_MODE_KX_2500: + pdata->phy.speed = SPEED_2500; + break; + case XGBE_MODE_KR: + case XGBE_MODE_SFI: pdata->phy.speed = SPEED_10000; - xgbe_set_mode(pdata, XGBE_MODE_KR); - } else if (ad_reg & 0x20) { - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.speed = SPEED_1000; - break; - - case XGBE_SPEEDSET_2500_10000: - pdata->phy.speed = SPEED_2500; - break; - } - - xgbe_set_mode(pdata, XGBE_MODE_KX); - } else { + break; + case XGBE_MODE_UNKNOWN: + default: pdata->phy.speed = SPEED_UNKNOWN; } - /* Compare Advertisement and Link Partner register 3 */ - ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); - lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); - if (lp_reg & 0xc000) - pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; - pdata->phy.duplex = DUPLEX_FULL; + + xgbe_set_mode(pdata, mode); } static void xgbe_phy_status(struct xgbe_prv_data *pdata) { - unsigned int reg, link_aneg; + unsigned int link_aneg; + int an_restart; if (test_bit(XGBE_LINK_ERR, &pdata->dev_state)) { netif_carrier_off(pdata->netdev); @@ -1128,12 +1305,12 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) link_aneg = (pdata->phy.autoneg == AUTONEG_ENABLE); - /* Get the link status. Link status is latched low, so read - * once to clear and then read again to get current state - */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - pdata->phy.link = (reg & MDIO_STAT1_LSTATUS) ? 1 : 0; + pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata, + &an_restart); + if (an_restart) { + xgbe_phy_config_aneg(pdata); + return; + } if (pdata->phy.link) { if (link_aneg && !xgbe_phy_aneg_done(pdata)) { @@ -1141,7 +1318,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) return; } - xgbe_phy_status_aneg(pdata); + xgbe_phy_status_result(pdata); if (test_bit(XGBE_LINK_INIT, &pdata->dev_state)) clear_bit(XGBE_LINK_INIT, &pdata->dev_state); @@ -1155,7 +1332,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) return; } - xgbe_phy_status_aneg(pdata); + xgbe_phy_status_result(pdata); netif_carrier_off(pdata->netdev); } @@ -1168,13 +1345,19 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) { netif_dbg(pdata, link, pdata->netdev, "stopping PHY\n"); + if (!pdata->phy_started) + return; + + /* Indicate the PHY is down */ + pdata->phy_started = 0; + /* Disable auto-negotiation */ - xgbe_disable_an(pdata); + xgbe_an_disable_all(pdata); - /* Disable auto-negotiation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0); + if (pdata->dev_irq != pdata->an_irq) + devm_free_irq(pdata->dev, pdata->an_irq, pdata); - devm_free_irq(pdata->dev, pdata->an_irq, pdata); + pdata->phy_if.phy_impl.stop(pdata); pdata->phy.link = 0; netif_carrier_off(pdata->netdev); @@ -1189,64 +1372,74 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata) netif_dbg(pdata, link, pdata->netdev, "starting PHY\n"); - ret = devm_request_irq(pdata->dev, pdata->an_irq, - xgbe_an_isr, 0, pdata->an_name, - pdata); - if (ret) { - netdev_err(netdev, "phy irq request failed\n"); + ret = pdata->phy_if.phy_impl.start(pdata); + if (ret) return ret; + + /* If we have a separate AN irq, enable it */ + if (pdata->dev_irq != pdata->an_irq) { + ret = devm_request_irq(pdata->dev, pdata->an_irq, + xgbe_an_isr, 0, pdata->an_name, + pdata); + if (ret) { + netdev_err(netdev, "phy irq request failed\n"); + goto err_stop; + } } /* Set initial mode - call the mode setting routines * directly to insure we are properly configured */ - if (xgbe_use_xgmii_mode(pdata)) { - xgbe_xgmii_mode(pdata); - } else if (xgbe_use_gmii_mode(pdata)) { - xgbe_gmii_mode(pdata); - } else if (xgbe_use_gmii_2500_mode(pdata)) { - xgbe_gmii_2500_mode(pdata); + if (xgbe_use_mode(pdata, XGBE_MODE_KR)) { + xgbe_kr_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) { + xgbe_kx_2500_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) { + xgbe_kx_1000_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) { + xgbe_sfi_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_X)) { + xgbe_x_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) { + xgbe_sgmii_1000_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) { + xgbe_sgmii_100_mode(pdata); } else { ret = -EINVAL; goto err_irq; } - /* Set up advertisement registers based on current settings */ - xgbe_an_init(pdata); + /* Indicate the PHY is up and running */ + pdata->phy_started = 1; - /* Enable auto-negotiation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0x07); + xgbe_an_init(pdata); + xgbe_an_enable_interrupts(pdata); return xgbe_phy_config_aneg(pdata); err_irq: - devm_free_irq(pdata->dev, pdata->an_irq, pdata); + if (pdata->dev_irq != pdata->an_irq) + devm_free_irq(pdata->dev, pdata->an_irq, pdata); + +err_stop: + pdata->phy_if.phy_impl.stop(pdata); return ret; } static int xgbe_phy_reset(struct xgbe_prv_data *pdata) { - unsigned int count, reg; - - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - reg |= MDIO_CTRL1_RESET; - XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); - - count = 50; - do { - msleep(20); - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); - } while ((reg & MDIO_CTRL1_RESET) && --count); + int ret; - if (reg & MDIO_CTRL1_RESET) - return -ETIMEDOUT; + ret = pdata->phy_if.phy_impl.reset(pdata); + if (ret) + return ret; /* Disable auto-negotiation for now */ - xgbe_disable_an(pdata); + xgbe_an_disable_all(pdata); /* Clear auto-negotiation interrupts */ - XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0); + xgbe_an_clear_interrupts_all(pdata); return 0; } @@ -1257,74 +1450,96 @@ static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata) dev_dbg(dev, "\n************* PHY Reg dump **********************\n"); - dev_dbg(dev, "PCS Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, + dev_dbg(dev, "PCS Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1)); - dev_dbg(dev, "PCS Status Reg (%#04x) = %#04x\n", MDIO_STAT1, + dev_dbg(dev, "PCS Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1)); - dev_dbg(dev, "Phy Id (PHYS ID 1 %#04x)= %#04x\n", MDIO_DEVID1, + dev_dbg(dev, "Phy Id (PHYS ID 1 %#06x)= %#06x\n", MDIO_DEVID1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1)); - dev_dbg(dev, "Phy Id (PHYS ID 2 %#04x)= %#04x\n", MDIO_DEVID2, + dev_dbg(dev, "Phy Id (PHYS ID 2 %#06x)= %#06x\n", MDIO_DEVID2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2)); - dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS1, + dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS1, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1)); - dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS2, + dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS2, XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2)); - dev_dbg(dev, "Auto-Neg Control Reg (%#04x) = %#04x\n", MDIO_CTRL1, + dev_dbg(dev, "Auto-Neg Control Reg (%#06x) = %#06x\n", MDIO_CTRL1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1)); - dev_dbg(dev, "Auto-Neg Status Reg (%#04x) = %#04x\n", MDIO_STAT1, + dev_dbg(dev, "Auto-Neg Status Reg (%#06x) = %#06x\n", MDIO_STAT1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1)); - dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE)); - dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 1, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1)); - dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#06x) = %#06x\n", MDIO_AN_ADVERTISE + 2, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2)); - dev_dbg(dev, "Auto-Neg Completion Reg (%#04x) = %#04x\n", + dev_dbg(dev, "Auto-Neg Completion Reg (%#06x) = %#06x\n", MDIO_AN_COMP_STAT, XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT)); dev_dbg(dev, "\n*************************************************\n"); } -static void xgbe_phy_init(struct xgbe_prv_data *pdata) +static int xgbe_phy_best_advertised_speed(struct xgbe_prv_data *pdata) { + if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) + return SPEED_10000; + else if (pdata->phy.advertising & ADVERTISED_10000baseT_Full) + return SPEED_10000; + else if (pdata->phy.advertising & ADVERTISED_2500baseX_Full) + return SPEED_2500; + else if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full) + return SPEED_1000; + else if (pdata->phy.advertising & ADVERTISED_1000baseT_Full) + return SPEED_1000; + else if (pdata->phy.advertising & ADVERTISED_100baseT_Full) + return SPEED_100; + + return SPEED_UNKNOWN; +} + +static void xgbe_phy_exit(struct xgbe_prv_data *pdata) +{ + xgbe_phy_stop(pdata); + + pdata->phy_if.phy_impl.exit(pdata); +} + +static int xgbe_phy_init(struct xgbe_prv_data *pdata) +{ + int ret; + mutex_init(&pdata->an_mutex); INIT_WORK(&pdata->an_irq_work, xgbe_an_irq_work); INIT_WORK(&pdata->an_work, xgbe_an_state_machine); pdata->mdio_mmd = MDIO_MMD_PCS; - /* Initialize supported features */ - pdata->phy.supported = SUPPORTED_Autoneg; - pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - pdata->phy.supported |= SUPPORTED_Backplane; - pdata->phy.supported |= SUPPORTED_10000baseKR_Full; - switch (pdata->speed_set) { - case XGBE_SPEEDSET_1000_10000: - pdata->phy.supported |= SUPPORTED_1000baseKX_Full; - break; - case XGBE_SPEEDSET_2500_10000: - pdata->phy.supported |= SUPPORTED_2500baseX_Full; - break; - } - + /* Check for FEC support */ pdata->fec_ability = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FECABLE); pdata->fec_ability &= (MDIO_PMA_10GBR_FECABLE_ABLE | MDIO_PMA_10GBR_FECABLE_ERRABLE); - if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) - pdata->phy.supported |= SUPPORTED_10000baseR_FEC; + /* Setup the phy (including supported features) */ + ret = pdata->phy_if.phy_impl.init(pdata); + if (ret) + return ret; pdata->phy.advertising = pdata->phy.supported; pdata->phy.address = 0; - pdata->phy.autoneg = AUTONEG_ENABLE; - pdata->phy.speed = SPEED_UNKNOWN; - pdata->phy.duplex = DUPLEX_UNKNOWN; + if (pdata->phy.advertising & ADVERTISED_Autoneg) { + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + } else { + pdata->phy.autoneg = AUTONEG_DISABLE; + pdata->phy.speed = xgbe_phy_best_advertised_speed(pdata); + pdata->phy.duplex = DUPLEX_FULL; + } pdata->phy.link = 0; @@ -1346,11 +1561,14 @@ static void xgbe_phy_init(struct xgbe_prv_data *pdata) if (netif_msg_drv(pdata)) xgbe_dump_phy_registers(pdata); + + return 0; } void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if) { phy_if->phy_init = xgbe_phy_init; + phy_if->phy_exit = xgbe_phy_exit; phy_if->phy_reset = xgbe_phy_reset; phy_if->phy_start = xgbe_phy_start; @@ -1358,4 +1576,8 @@ void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if) phy_if->phy_status = xgbe_phy_status; phy_if->phy_config_aneg = xgbe_phy_config_aneg; + + phy_if->phy_valid_speed = xgbe_phy_valid_speed; + + phy_if->an_isr = xgbe_an_combined_isr; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c new file mode 100644 index 000000000000..e76b7f65b805 --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -0,0 +1,529 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/log2.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +static int xgbe_config_msi(struct xgbe_prv_data *pdata) +{ + unsigned int msi_count; + unsigned int i, j; + int ret; + + msi_count = XGBE_MSIX_BASE_COUNT; + msi_count += max(pdata->rx_ring_count, + pdata->tx_ring_count); + msi_count = roundup_pow_of_two(msi_count); + + ret = pci_enable_msi_exact(pdata->pcidev, msi_count); + if (ret < 0) { + dev_info(pdata->dev, "MSI request for %u interrupts failed\n", + msi_count); + + ret = pci_enable_msi(pdata->pcidev); + if (ret < 0) { + dev_info(pdata->dev, "MSI enablement failed\n"); + return ret; + } + + msi_count = 1; + } + + pdata->irq_count = msi_count; + + pdata->dev_irq = pdata->pcidev->irq; + + if (msi_count > 1) { + pdata->ecc_irq = pdata->pcidev->irq + 1; + pdata->i2c_irq = pdata->pcidev->irq + 2; + pdata->an_irq = pdata->pcidev->irq + 3; + + for (i = XGBE_MSIX_BASE_COUNT, j = 0; + (i < msi_count) && (j < XGBE_MAX_DMA_CHANNELS); + i++, j++) + pdata->channel_irq[j] = pdata->pcidev->irq + i; + pdata->channel_irq_count = j; + + pdata->per_channel_irq = 1; + pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL; + } else { + pdata->ecc_irq = pdata->pcidev->irq; + pdata->i2c_irq = pdata->pcidev->irq; + pdata->an_irq = pdata->pcidev->irq; + } + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "MSI interrupts enabled\n"); + + return 0; +} + +static int xgbe_config_msix(struct xgbe_prv_data *pdata) +{ + unsigned int msix_count; + unsigned int i, j; + int ret; + + msix_count = XGBE_MSIX_BASE_COUNT; + msix_count += max(pdata->rx_ring_count, + pdata->tx_ring_count); + + pdata->msix_entries = devm_kcalloc(pdata->dev, msix_count, + sizeof(struct msix_entry), + GFP_KERNEL); + if (!pdata->msix_entries) + return -ENOMEM; + + for (i = 0; i < msix_count; i++) + pdata->msix_entries[i].entry = i; + + ret = pci_enable_msix_range(pdata->pcidev, pdata->msix_entries, + XGBE_MSIX_MIN_COUNT, msix_count); + if (ret < 0) { + dev_info(pdata->dev, "MSI-X enablement failed\n"); + devm_kfree(pdata->dev, pdata->msix_entries); + pdata->msix_entries = NULL; + return ret; + } + + pdata->irq_count = ret; + + pdata->dev_irq = pdata->msix_entries[0].vector; + pdata->ecc_irq = pdata->msix_entries[1].vector; + pdata->i2c_irq = pdata->msix_entries[2].vector; + pdata->an_irq = pdata->msix_entries[3].vector; + + for (i = XGBE_MSIX_BASE_COUNT, j = 0; i < ret; i++, j++) + pdata->channel_irq[j] = pdata->msix_entries[i].vector; + pdata->channel_irq_count = j; + + pdata->per_channel_irq = 1; + pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL; + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "MSI-X interrupts enabled\n"); + + return 0; +} + +static int xgbe_config_irqs(struct xgbe_prv_data *pdata) +{ + int ret; + + ret = xgbe_config_msix(pdata); + if (!ret) + goto out; + + ret = xgbe_config_msi(pdata); + if (!ret) + goto out; + + pdata->irq_count = 1; + pdata->irq_shared = 1; + + pdata->dev_irq = pdata->pcidev->irq; + pdata->ecc_irq = pdata->pcidev->irq; + pdata->i2c_irq = pdata->pcidev->irq; + pdata->an_irq = pdata->pcidev->irq; + +out: + if (netif_msg_probe(pdata)) { + unsigned int i; + + dev_dbg(pdata->dev, " dev irq=%d\n", pdata->dev_irq); + dev_dbg(pdata->dev, " ecc irq=%d\n", pdata->ecc_irq); + dev_dbg(pdata->dev, " i2c irq=%d\n", pdata->i2c_irq); + dev_dbg(pdata->dev, " an irq=%d\n", pdata->an_irq); + for (i = 0; i < pdata->channel_irq_count; i++) + dev_dbg(pdata->dev, " dma%u irq=%d\n", + i, pdata->channel_irq[i]); + } + + return 0; +} + +static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct xgbe_prv_data *pdata; + struct device *dev = &pdev->dev; + void __iomem * const *iomap_table; + unsigned int ma_lo, ma_hi; + unsigned int reg; + int bar_mask; + int ret; + + pdata = xgbe_alloc_pdata(dev); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto err_alloc; + } + + pdata->pcidev = pdev; + pci_set_drvdata(pdev, pdata); + + /* Get the version data */ + pdata->vdata = (struct xgbe_version_data *)id->driver_data; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "pcim_enable_device failed\n"); + goto err_pci_enable; + } + + /* Obtain the mmio areas for the device */ + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); + ret = pcim_iomap_regions(pdev, bar_mask, XGBE_DRV_NAME); + if (ret) { + dev_err(dev, "pcim_iomap_regions failed\n"); + goto err_pci_enable; + } + + iomap_table = pcim_iomap_table(pdev); + if (!iomap_table) { + dev_err(dev, "pcim_iomap_table failed\n"); + ret = -ENOMEM; + goto err_pci_enable; + } + + pdata->xgmac_regs = iomap_table[XGBE_XGMAC_BAR]; + if (!pdata->xgmac_regs) { + dev_err(dev, "xgmac ioremap failed\n"); + ret = -ENOMEM; + goto err_pci_enable; + } + pdata->xprop_regs = pdata->xgmac_regs + XGBE_MAC_PROP_OFFSET; + pdata->xi2c_regs = pdata->xgmac_regs + XGBE_I2C_CTRL_OFFSET; + if (netif_msg_probe(pdata)) { + dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs); + dev_dbg(dev, "xprop_regs = %p\n", pdata->xprop_regs); + dev_dbg(dev, "xi2c_regs = %p\n", pdata->xi2c_regs); + } + + pdata->xpcs_regs = iomap_table[XGBE_XPCS_BAR]; + if (!pdata->xpcs_regs) { + dev_err(dev, "xpcs ioremap failed\n"); + ret = -ENOMEM; + goto err_pci_enable; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); + + /* Configure the PCS indirect addressing support */ + reg = XPCS32_IOREAD(pdata, PCS_V2_WINDOW_DEF); + pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET); + pdata->xpcs_window <<= 6; + pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE); + pdata->xpcs_window_size = 1 << (pdata->xpcs_window_size + 7); + pdata->xpcs_window_mask = pdata->xpcs_window_size - 1; + if (netif_msg_probe(pdata)) { + dev_dbg(dev, "xpcs window = %#010x\n", + pdata->xpcs_window); + dev_dbg(dev, "xpcs window size = %#010x\n", + pdata->xpcs_window_size); + dev_dbg(dev, "xpcs window mask = %#010x\n", + pdata->xpcs_window_mask); + } + + pci_set_master(pdev); + + /* Enable all interrupts in the hardware */ + XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); + + /* Retrieve the MAC address */ + ma_lo = XP_IOREAD(pdata, XP_MAC_ADDR_LO); + ma_hi = XP_IOREAD(pdata, XP_MAC_ADDR_HI); + pdata->mac_addr[0] = ma_lo & 0xff; + pdata->mac_addr[1] = (ma_lo >> 8) & 0xff; + pdata->mac_addr[2] = (ma_lo >> 16) & 0xff; + pdata->mac_addr[3] = (ma_lo >> 24) & 0xff; + pdata->mac_addr[4] = ma_hi & 0xff; + pdata->mac_addr[5] = (ma_hi >> 8) & 0xff; + if (!XP_GET_BITS(ma_hi, XP_MAC_ADDR_HI, VALID) || + !is_valid_ether_addr(pdata->mac_addr)) { + dev_err(dev, "invalid mac address\n"); + ret = -EINVAL; + goto err_pci_enable; + } + + /* Clock settings */ + pdata->sysclk_rate = XGBE_V2_DMA_CLOCK_FREQ; + pdata->ptpclk_rate = XGBE_V2_PTP_CLOCK_FREQ; + + /* Set the DMA coherency values */ + pdata->coherent = 1; + pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; + pdata->arcache = XGBE_DMA_OS_ARCACHE; + pdata->awcache = XGBE_DMA_OS_AWCACHE; + + /* Set the maximum channels and queues */ + reg = XP_IOREAD(pdata, XP_PROP_1); + pdata->tx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_DMA); + pdata->rx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_DMA); + pdata->tx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_QUEUES); + pdata->rx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_QUEUES); + if (netif_msg_probe(pdata)) { + dev_dbg(dev, "max tx/rx channel count = %u/%u\n", + pdata->tx_max_channel_count, + pdata->tx_max_channel_count); + dev_dbg(dev, "max tx/rx hw queue count = %u/%u\n", + pdata->tx_max_q_count, pdata->rx_max_q_count); + } + + /* Set the hardware channel and queue counts */ + xgbe_set_counts(pdata); + + /* Set the maximum fifo amounts */ + reg = XP_IOREAD(pdata, XP_PROP_2); + pdata->tx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, TX_FIFO_SIZE); + pdata->tx_max_fifo_size *= 16384; + pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size, + pdata->vdata->tx_max_fifo_size); + pdata->rx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, RX_FIFO_SIZE); + pdata->rx_max_fifo_size *= 16384; + pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size, + pdata->vdata->rx_max_fifo_size); + if (netif_msg_probe(pdata)) + dev_dbg(dev, "max tx/rx max fifo size = %u/%u\n", + pdata->tx_max_fifo_size, pdata->rx_max_fifo_size); + + /* Configure interrupt support */ + ret = xgbe_config_irqs(pdata); + if (ret) + goto err_pci_enable; + + /* Configure the netdev resource */ + ret = xgbe_config_netdev(pdata); + if (ret) + goto err_pci_enable; + + netdev_notice(pdata->netdev, "net device enabled\n"); + + return 0; + +err_pci_enable: + xgbe_free_pdata(pdata); + +err_alloc: + dev_notice(dev, "net device not enabled\n"); + + return ret; +} + +static void xgbe_pci_remove(struct pci_dev *pdev) +{ + struct xgbe_prv_data *pdata = pci_get_drvdata(pdev); + + xgbe_deconfig_netdev(pdata); + + xgbe_free_pdata(pdata); +} + +#ifdef CONFIG_PM +static int xgbe_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct xgbe_prv_data *pdata = pci_get_drvdata(pdev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + if (netif_running(netdev)) + ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + + pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + return ret; +} + +static int xgbe_pci_resume(struct pci_dev *pdev) +{ + struct xgbe_prv_data *pdata = pci_get_drvdata(pdev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + if (netif_running(netdev)) { + ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + + /* Schedule a restart in case the link or phy state changed + * while we were powered down. + */ + schedule_work(&pdata->restart_work); + } + + return ret; +} +#endif /* CONFIG_PM */ + +static const struct xgbe_version_data xgbe_v2a = { + .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, + .xpcs_access = XGBE_XPCS_ACCESS_V2, + .mmc_64bit = 1, + .tx_max_fifo_size = 229376, + .rx_max_fifo_size = 229376, + .tx_tstamp_workaround = 1, + .ecc_support = 1, + .i2c_support = 1, +}; + +static const struct xgbe_version_data xgbe_v2b = { + .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v2, + .xpcs_access = XGBE_XPCS_ACCESS_V2, + .mmc_64bit = 1, + .tx_max_fifo_size = 65536, + .rx_max_fifo_size = 65536, + .tx_tstamp_workaround = 1, + .ecc_support = 1, + .i2c_support = 1, +}; + +static const struct pci_device_id xgbe_pci_table[] = { + { PCI_VDEVICE(AMD, 0x1458), + .driver_data = (kernel_ulong_t)&xgbe_v2a }, + { PCI_VDEVICE(AMD, 0x1459), + .driver_data = (kernel_ulong_t)&xgbe_v2b }, + /* Last entry must be zero */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, xgbe_pci_table); + +static struct pci_driver xgbe_driver = { + .name = XGBE_DRV_NAME, + .id_table = xgbe_pci_table, + .probe = xgbe_pci_probe, + .remove = xgbe_pci_remove, +#ifdef CONFIG_PM + .suspend = xgbe_pci_suspend, + .resume = xgbe_pci_resume, +#endif +}; + +int xgbe_pci_init(void) +{ + return pci_register_driver(&xgbe_driver); +} + +void xgbe_pci_exit(void) +{ + pci_unregister_driver(&xgbe_driver); +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c new file mode 100644 index 000000000000..c75edcac5e0a --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c @@ -0,0 +1,845 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/device.h> +#include <linux/property.h> +#include <linux/mdio.h> +#include <linux/phy.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#define XGBE_BLWC_PROPERTY "amd,serdes-blwc" +#define XGBE_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" +#define XGBE_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" +#define XGBE_TX_AMP_PROPERTY "amd,serdes-tx-amp" +#define XGBE_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" +#define XGBE_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" + +/* Default SerDes settings */ +#define XGBE_SPEED_1000_BLWC 1 +#define XGBE_SPEED_1000_CDR 0x2 +#define XGBE_SPEED_1000_PLL 0x0 +#define XGBE_SPEED_1000_PQ 0xa +#define XGBE_SPEED_1000_RATE 0x3 +#define XGBE_SPEED_1000_TXAMP 0xf +#define XGBE_SPEED_1000_WORD 0x1 +#define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3 +#define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0 + +#define XGBE_SPEED_2500_BLWC 1 +#define XGBE_SPEED_2500_CDR 0x2 +#define XGBE_SPEED_2500_PLL 0x0 +#define XGBE_SPEED_2500_PQ 0xa +#define XGBE_SPEED_2500_RATE 0x1 +#define XGBE_SPEED_2500_TXAMP 0xf +#define XGBE_SPEED_2500_WORD 0x1 +#define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3 +#define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0 + +#define XGBE_SPEED_10000_BLWC 0 +#define XGBE_SPEED_10000_CDR 0x7 +#define XGBE_SPEED_10000_PLL 0x1 +#define XGBE_SPEED_10000_PQ 0x12 +#define XGBE_SPEED_10000_RATE 0x0 +#define XGBE_SPEED_10000_TXAMP 0xa +#define XGBE_SPEED_10000_WORD 0x7 +#define XGBE_SPEED_10000_DFE_TAP_CONFIG 0x1 +#define XGBE_SPEED_10000_DFE_TAP_ENABLE 0x7f + +/* Rate-change complete wait/retry count */ +#define XGBE_RATECHANGE_COUNT 500 + +static const u32 xgbe_phy_blwc[] = { + XGBE_SPEED_1000_BLWC, + XGBE_SPEED_2500_BLWC, + XGBE_SPEED_10000_BLWC, +}; + +static const u32 xgbe_phy_cdr_rate[] = { + XGBE_SPEED_1000_CDR, + XGBE_SPEED_2500_CDR, + XGBE_SPEED_10000_CDR, +}; + +static const u32 xgbe_phy_pq_skew[] = { + XGBE_SPEED_1000_PQ, + XGBE_SPEED_2500_PQ, + XGBE_SPEED_10000_PQ, +}; + +static const u32 xgbe_phy_tx_amp[] = { + XGBE_SPEED_1000_TXAMP, + XGBE_SPEED_2500_TXAMP, + XGBE_SPEED_10000_TXAMP, +}; + +static const u32 xgbe_phy_dfe_tap_cfg[] = { + XGBE_SPEED_1000_DFE_TAP_CONFIG, + XGBE_SPEED_2500_DFE_TAP_CONFIG, + XGBE_SPEED_10000_DFE_TAP_CONFIG, +}; + +static const u32 xgbe_phy_dfe_tap_ena[] = { + XGBE_SPEED_1000_DFE_TAP_ENABLE, + XGBE_SPEED_2500_DFE_TAP_ENABLE, + XGBE_SPEED_10000_DFE_TAP_ENABLE, +}; + +struct xgbe_phy_data { + /* 1000/10000 vs 2500/10000 indicator */ + unsigned int speed_set; + + /* SerDes UEFI configurable settings. + * Switching between modes/speeds requires new values for some + * SerDes settings. The values can be supplied as device + * properties in array format. The first array entry is for + * 1GbE, second for 2.5GbE and third for 10GbE + */ + u32 blwc[XGBE_SPEEDS]; + u32 cdr_rate[XGBE_SPEEDS]; + u32 pq_skew[XGBE_SPEEDS]; + u32 tx_amp[XGBE_SPEEDS]; + u32 dfe_tap_cfg[XGBE_SPEEDS]; + u32 dfe_tap_ena[XGBE_SPEEDS]; +}; + +static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata) +{ + XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1); +} + +static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata) +{ + XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0); +} + +static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_Backplane; + + /* Compare Advertisement and Link Partner register 1 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); + if (lp_reg & 0x400) + pdata->phy.lp_advertising |= ADVERTISED_Pause; + if (lp_reg & 0x800) + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + + if (pdata->phy.pause_autoneg) { + /* Set flow control based on auto-negotiation result */ + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (ad_reg & lp_reg & 0x400) { + pdata->phy.tx_pause = 1; + pdata->phy.rx_pause = 1; + } else if (ad_reg & lp_reg & 0x800) { + if (ad_reg & 0x400) + pdata->phy.rx_pause = 1; + else if (lp_reg & 0x400) + pdata->phy.tx_pause = 1; + } + } + + /* Compare Advertisement and Link Partner register 2 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; + if (lp_reg & 0x20) { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full; + else + pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; + } + + ad_reg &= lp_reg; + if (ad_reg & 0x80) { + mode = XGBE_MODE_KR; + } else if (ad_reg & 0x20) { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + mode = XGBE_MODE_KX_2500; + else + mode = XGBE_MODE_KX_1000; + } else { + mode = XGBE_MODE_UNKNOWN; + } + + /* Compare Advertisement and Link Partner register 3 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + if (lp_reg & 0xc000) + pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; + + return mode; +} + +static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata) +{ + return pdata->phy.advertising; +} + +static int xgbe_phy_an_config(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for an configuration */ + return 0; +} + +static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata) +{ + return XGBE_AN_MODE_CL73; +} + +static void xgbe_phy_pcs_power_cycle(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + + reg |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + usleep_range(75, 100); + + reg &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); +} + +static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata) +{ + /* Assert Rx and Tx ratechange */ + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1); +} + +static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata) +{ + unsigned int wait; + u16 status; + + /* Release Rx and Tx ratechange */ + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0); + + /* Wait for Rx and Tx ready */ + wait = XGBE_RATECHANGE_COUNT; + while (wait--) { + usleep_range(50, 75); + + status = XSIR0_IOREAD(pdata, SIR0_STATUS); + if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && + XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) + goto rx_reset; + } + + netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n", + status); + +rx_reset: + /* Perform Rx reset for the DFE changes */ + XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1); +} + +static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + /* Set PCS to KR/10G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBR; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED10G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_phy_pcs_power_cycle(pdata); + + /* Set SerDes to 10G speed */ + xgbe_phy_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + phy_data->cdr_rate[XGBE_SPEED_10000]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + phy_data->tx_amp[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + phy_data->blwc[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + phy_data->pq_skew[XGBE_SPEED_10000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + phy_data->dfe_tap_cfg[XGBE_SPEED_10000]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + phy_data->dfe_tap_ena[XGBE_SPEED_10000]); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n"); +} + +static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + /* Set PCS to KX/1G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBX; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED1G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_phy_pcs_power_cycle(pdata); + + /* Set SerDes to 2.5G speed */ + xgbe_phy_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + phy_data->cdr_rate[XGBE_SPEED_2500]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + phy_data->tx_amp[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + phy_data->blwc[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + phy_data->pq_skew[XGBE_SPEED_2500]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + phy_data->dfe_tap_cfg[XGBE_SPEED_2500]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + phy_data->dfe_tap_ena[XGBE_SPEED_2500]); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n"); +} + +static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + /* Set PCS to KX/1G speed */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= ~MDIO_PCS_CTRL2_TYPE; + reg |= MDIO_PCS_CTRL2_10GBX; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg &= ~MDIO_CTRL1_SPEEDSEL; + reg |= MDIO_CTRL1_SPEED1G; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + xgbe_phy_pcs_power_cycle(pdata); + + /* Set SerDes to 1G speed */ + xgbe_phy_start_ratechange(pdata); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL); + + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE, + phy_data->cdr_rate[XGBE_SPEED_1000]); + XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP, + phy_data->tx_amp[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA, + phy_data->blwc[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG, + phy_data->pq_skew[XGBE_SPEED_1000]); + XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG, + phy_data->dfe_tap_cfg[XGBE_SPEED_1000]); + XRXTX_IOWRITE(pdata, RXTX_REG22, + phy_data->dfe_tap_ena[XGBE_SPEED_1000]); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n"); +} + +static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + unsigned int reg; + + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2); + reg &= MDIO_PCS_CTRL2_TYPE; + + if (reg == MDIO_PCS_CTRL2_10GBR) { + mode = XGBE_MODE_KR; + } else { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + mode = XGBE_MODE_KX_2500; + else + mode = XGBE_MODE_KX_1000; + } + + return mode; +} + +static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + + /* If we are in KR switch to KX, and vice-versa */ + if (xgbe_phy_cur_mode(pdata) == XGBE_MODE_KR) { + if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + mode = XGBE_MODE_KX_2500; + else + mode = XGBE_MODE_KX_1000; + } else { + mode = XGBE_MODE_KR; + } + + return mode; +} + +static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata, + int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (speed) { + case SPEED_1000: + return (phy_data->speed_set == XGBE_SPEEDSET_1000_10000) + ? XGBE_MODE_KX_1000 : XGBE_MODE_UNKNOWN; + case SPEED_2500: + return (phy_data->speed_set == XGBE_SPEEDSET_2500_10000) + ? XGBE_MODE_KX_2500 : XGBE_MODE_UNKNOWN; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + xgbe_phy_kx_1000_mode(pdata); + break; + case XGBE_MODE_KX_2500: + xgbe_phy_kx_2500_mode(pdata); + break; + case XGBE_MODE_KR: + xgbe_phy_kr_mode(pdata); + break; + default: + break; + } +} + +static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode, u32 advert) +{ + if (pdata->phy.autoneg == AUTONEG_ENABLE) { + if (pdata->phy.advertising & advert) + return true; + } else { + enum xgbe_mode cur_mode; + + cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed); + if (cur_mode == mode) + return true; + } + + return false; +} + +static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseKX_Full); + case XGBE_MODE_KX_2500: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_2500baseX_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseKR_Full); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (speed) { + case SPEED_1000: + if (phy_data->speed_set != XGBE_SPEEDSET_1000_10000) + return false; + return true; + case SPEED_2500: + if (phy_data->speed_set != XGBE_SPEEDSET_2500_10000) + return false; + return true; + case SPEED_10000: + return true; + default: + return false; + } +} + +static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) +{ + unsigned int reg; + + *an_restart = 0; + + /* Link status is latched low, so read once to clear + * and then read again to get current state + */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + + return (reg & MDIO_STAT1_LSTATUS) ? 1 : 0; +} + +static void xgbe_phy_stop(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for stop */ +} + +static int xgbe_phy_start(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for start */ + return 0; +} + +static int xgbe_phy_reset(struct xgbe_prv_data *pdata) +{ + unsigned int reg, count; + + /* Perform a software reset of the PCS */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + reg |= MDIO_CTRL1_RESET; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg); + + count = 50; + do { + msleep(20); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + } while ((reg & MDIO_CTRL1_RESET) && --count); + + if (reg & MDIO_CTRL1_RESET) + return -ETIMEDOUT; + + return 0; +} + +static void xgbe_phy_exit(struct xgbe_prv_data *pdata) +{ + /* Nothing uniquely required for exit */ +} + +static int xgbe_phy_init(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data; + int ret; + + phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL); + if (!phy_data) + return -ENOMEM; + + /* Retrieve the PHY speedset */ + ret = device_property_read_u32(pdata->phy_dev, XGBE_SPEEDSET_PROPERTY, + &phy_data->speed_set); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_SPEEDSET_PROPERTY); + return ret; + } + + switch (phy_data->speed_set) { + case XGBE_SPEEDSET_1000_10000: + case XGBE_SPEEDSET_2500_10000: + break; + default: + dev_err(pdata->dev, "invalid %s property\n", + XGBE_SPEEDSET_PROPERTY); + return -EINVAL; + } + + /* Retrieve the PHY configuration properties */ + if (device_property_present(pdata->phy_dev, XGBE_BLWC_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_BLWC_PROPERTY, + phy_data->blwc, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_BLWC_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->blwc, xgbe_phy_blwc, + sizeof(phy_data->blwc)); + } + + if (device_property_present(pdata->phy_dev, XGBE_CDR_RATE_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_CDR_RATE_PROPERTY, + phy_data->cdr_rate, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_CDR_RATE_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->cdr_rate, xgbe_phy_cdr_rate, + sizeof(phy_data->cdr_rate)); + } + + if (device_property_present(pdata->phy_dev, XGBE_PQ_SKEW_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_PQ_SKEW_PROPERTY, + phy_data->pq_skew, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_PQ_SKEW_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->pq_skew, xgbe_phy_pq_skew, + sizeof(phy_data->pq_skew)); + } + + if (device_property_present(pdata->phy_dev, XGBE_TX_AMP_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_TX_AMP_PROPERTY, + phy_data->tx_amp, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_TX_AMP_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->tx_amp, xgbe_phy_tx_amp, + sizeof(phy_data->tx_amp)); + } + + if (device_property_present(pdata->phy_dev, XGBE_DFE_CFG_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_DFE_CFG_PROPERTY, + phy_data->dfe_tap_cfg, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_DFE_CFG_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->dfe_tap_cfg, xgbe_phy_dfe_tap_cfg, + sizeof(phy_data->dfe_tap_cfg)); + } + + if (device_property_present(pdata->phy_dev, XGBE_DFE_ENA_PROPERTY)) { + ret = device_property_read_u32_array(pdata->phy_dev, + XGBE_DFE_ENA_PROPERTY, + phy_data->dfe_tap_ena, + XGBE_SPEEDS); + if (ret) { + dev_err(pdata->dev, "invalid %s property\n", + XGBE_DFE_ENA_PROPERTY); + return ret; + } + } else { + memcpy(phy_data->dfe_tap_ena, xgbe_phy_dfe_tap_ena, + sizeof(phy_data->dfe_tap_ena)); + } + + /* Initialize supported features */ + pdata->phy.supported = SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_Backplane; + pdata->phy.supported |= SUPPORTED_10000baseKR_Full; + switch (phy_data->speed_set) { + case XGBE_SPEEDSET_1000_10000: + pdata->phy.supported |= SUPPORTED_1000baseKX_Full; + break; + case XGBE_SPEEDSET_2500_10000: + pdata->phy.supported |= SUPPORTED_2500baseX_Full; + break; + } + + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= SUPPORTED_10000baseR_FEC; + + pdata->phy_data = phy_data; + + return 0; +} + +void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *phy_if) +{ + struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl; + + phy_impl->init = xgbe_phy_init; + phy_impl->exit = xgbe_phy_exit; + + phy_impl->reset = xgbe_phy_reset; + phy_impl->start = xgbe_phy_start; + phy_impl->stop = xgbe_phy_stop; + + phy_impl->link_status = xgbe_phy_link_status; + + phy_impl->valid_speed = xgbe_phy_valid_speed; + + phy_impl->use_mode = xgbe_phy_use_mode; + phy_impl->set_mode = xgbe_phy_set_mode; + phy_impl->get_mode = xgbe_phy_get_mode; + phy_impl->switch_mode = xgbe_phy_switch_mode; + phy_impl->cur_mode = xgbe_phy_cur_mode; + + phy_impl->an_mode = xgbe_phy_an_mode; + + phy_impl->an_config = xgbe_phy_an_config; + + phy_impl->an_advertising = xgbe_phy_an_advertising; + + phy_impl->an_outcome = xgbe_phy_an_outcome; + + phy_impl->kr_training_pre = xgbe_phy_kr_training_pre; + phy_impl->kr_training_post = xgbe_phy_kr_training_post; +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c new file mode 100644 index 000000000000..4ba43328d99e --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -0,0 +1,3083 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kmod.h> +#include <linux/mdio.h> +#include <linux/phy.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#define XGBE_PHY_PORT_SPEED_100 BIT(0) +#define XGBE_PHY_PORT_SPEED_1000 BIT(1) +#define XGBE_PHY_PORT_SPEED_2500 BIT(2) +#define XGBE_PHY_PORT_SPEED_10000 BIT(3) + +#define XGBE_MUTEX_RELEASE 0x80000000 + +#define XGBE_SFP_DIRECT 7 + +/* I2C target addresses */ +#define XGBE_SFP_SERIAL_ID_ADDRESS 0x50 +#define XGBE_SFP_DIAG_INFO_ADDRESS 0x51 +#define XGBE_SFP_PHY_ADDRESS 0x56 +#define XGBE_GPIO_ADDRESS_PCA9555 0x20 + +/* SFP sideband signal indicators */ +#define XGBE_GPIO_NO_TX_FAULT BIT(0) +#define XGBE_GPIO_NO_RATE_SELECT BIT(1) +#define XGBE_GPIO_NO_MOD_ABSENT BIT(2) +#define XGBE_GPIO_NO_RX_LOS BIT(3) + +/* Rate-change complete wait/retry count */ +#define XGBE_RATECHANGE_COUNT 500 + +enum xgbe_port_mode { + XGBE_PORT_MODE_RSVD = 0, + XGBE_PORT_MODE_BACKPLANE, + XGBE_PORT_MODE_BACKPLANE_2500, + XGBE_PORT_MODE_1000BASE_T, + XGBE_PORT_MODE_1000BASE_X, + XGBE_PORT_MODE_NBASE_T, + XGBE_PORT_MODE_10GBASE_T, + XGBE_PORT_MODE_10GBASE_R, + XGBE_PORT_MODE_SFP, + XGBE_PORT_MODE_MAX, +}; + +enum xgbe_conn_type { + XGBE_CONN_TYPE_NONE = 0, + XGBE_CONN_TYPE_SFP, + XGBE_CONN_TYPE_MDIO, + XGBE_CONN_TYPE_BACKPLANE, + XGBE_CONN_TYPE_MAX, +}; + +/* SFP/SFP+ related definitions */ +enum xgbe_sfp_comm { + XGBE_SFP_COMM_DIRECT = 0, + XGBE_SFP_COMM_PCA9545, +}; + +enum xgbe_sfp_cable { + XGBE_SFP_CABLE_UNKNOWN = 0, + XGBE_SFP_CABLE_ACTIVE, + XGBE_SFP_CABLE_PASSIVE, +}; + +enum xgbe_sfp_base { + XGBE_SFP_BASE_UNKNOWN = 0, + XGBE_SFP_BASE_1000_T, + XGBE_SFP_BASE_1000_SX, + XGBE_SFP_BASE_1000_LX, + XGBE_SFP_BASE_1000_CX, + XGBE_SFP_BASE_10000_SR, + XGBE_SFP_BASE_10000_LR, + XGBE_SFP_BASE_10000_LRM, + XGBE_SFP_BASE_10000_ER, + XGBE_SFP_BASE_10000_CR, +}; + +enum xgbe_sfp_speed { + XGBE_SFP_SPEED_UNKNOWN = 0, + XGBE_SFP_SPEED_100_1000, + XGBE_SFP_SPEED_1000, + XGBE_SFP_SPEED_10000, +}; + +/* SFP Serial ID Base ID values relative to an offset of 0 */ +#define XGBE_SFP_BASE_ID 0 +#define XGBE_SFP_ID_SFP 0x03 + +#define XGBE_SFP_BASE_EXT_ID 1 +#define XGBE_SFP_EXT_ID_SFP 0x04 + +#define XGBE_SFP_BASE_10GBE_CC 3 +#define XGBE_SFP_BASE_10GBE_CC_SR BIT(4) +#define XGBE_SFP_BASE_10GBE_CC_LR BIT(5) +#define XGBE_SFP_BASE_10GBE_CC_LRM BIT(6) +#define XGBE_SFP_BASE_10GBE_CC_ER BIT(7) + +#define XGBE_SFP_BASE_1GBE_CC 6 +#define XGBE_SFP_BASE_1GBE_CC_SX BIT(0) +#define XGBE_SFP_BASE_1GBE_CC_LX BIT(1) +#define XGBE_SFP_BASE_1GBE_CC_CX BIT(2) +#define XGBE_SFP_BASE_1GBE_CC_T BIT(3) + +#define XGBE_SFP_BASE_CABLE 8 +#define XGBE_SFP_BASE_CABLE_PASSIVE BIT(2) +#define XGBE_SFP_BASE_CABLE_ACTIVE BIT(3) + +#define XGBE_SFP_BASE_BR 12 +#define XGBE_SFP_BASE_BR_1GBE_MIN 0x0a +#define XGBE_SFP_BASE_BR_1GBE_MAX 0x0d +#define XGBE_SFP_BASE_BR_10GBE_MIN 0x64 +#define XGBE_SFP_BASE_BR_10GBE_MAX 0x68 + +#define XGBE_SFP_BASE_CU_CABLE_LEN 18 + +#define XGBE_SFP_BASE_VENDOR_NAME 20 +#define XGBE_SFP_BASE_VENDOR_NAME_LEN 16 +#define XGBE_SFP_BASE_VENDOR_PN 40 +#define XGBE_SFP_BASE_VENDOR_PN_LEN 16 +#define XGBE_SFP_BASE_VENDOR_REV 56 +#define XGBE_SFP_BASE_VENDOR_REV_LEN 4 + +#define XGBE_SFP_BASE_CC 63 + +/* SFP Serial ID Extended ID values relative to an offset of 64 */ +#define XGBE_SFP_BASE_VENDOR_SN 4 +#define XGBE_SFP_BASE_VENDOR_SN_LEN 16 + +#define XGBE_SFP_EXTD_DIAG 28 +#define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE BIT(2) + +#define XGBE_SFP_EXTD_SFF_8472 30 + +#define XGBE_SFP_EXTD_CC 31 + +struct xgbe_sfp_eeprom { + u8 base[64]; + u8 extd[32]; + u8 vendor[32]; +}; + +#define XGBE_BEL_FUSE_VENDOR "BEL-FUSE " +#define XGBE_BEL_FUSE_PARTNO "1GBT-SFP06 " + +struct xgbe_sfp_ascii { + union { + char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1]; + char partno[XGBE_SFP_BASE_VENDOR_PN_LEN + 1]; + char rev[XGBE_SFP_BASE_VENDOR_REV_LEN + 1]; + char serno[XGBE_SFP_BASE_VENDOR_SN_LEN + 1]; + } u; +}; + +/* MDIO PHY reset types */ +enum xgbe_mdio_reset { + XGBE_MDIO_RESET_NONE = 0, + XGBE_MDIO_RESET_I2C_GPIO, + XGBE_MDIO_RESET_INT_GPIO, + XGBE_MDIO_RESET_MAX, +}; + +/* Re-driver related definitions */ +enum xgbe_phy_redrv_if { + XGBE_PHY_REDRV_IF_MDIO = 0, + XGBE_PHY_REDRV_IF_I2C, + XGBE_PHY_REDRV_IF_MAX, +}; + +enum xgbe_phy_redrv_model { + XGBE_PHY_REDRV_MODEL_4223 = 0, + XGBE_PHY_REDRV_MODEL_4227, + XGBE_PHY_REDRV_MODEL_MAX, +}; + +enum xgbe_phy_redrv_mode { + XGBE_PHY_REDRV_MODE_CX = 5, + XGBE_PHY_REDRV_MODE_SR = 9, +}; + +#define XGBE_PHY_REDRV_MODE_REG 0x12b0 + +/* PHY related configuration information */ +struct xgbe_phy_data { + enum xgbe_port_mode port_mode; + + unsigned int port_id; + + unsigned int port_speeds; + + enum xgbe_conn_type conn_type; + + enum xgbe_mode cur_mode; + enum xgbe_mode start_mode; + + unsigned int rrc_count; + + unsigned int mdio_addr; + + unsigned int comm_owned; + + /* SFP Support */ + enum xgbe_sfp_comm sfp_comm; + unsigned int sfp_mux_address; + unsigned int sfp_mux_channel; + + unsigned int sfp_gpio_address; + unsigned int sfp_gpio_mask; + unsigned int sfp_gpio_rx_los; + unsigned int sfp_gpio_tx_fault; + unsigned int sfp_gpio_mod_absent; + unsigned int sfp_gpio_rate_select; + + unsigned int sfp_rx_los; + unsigned int sfp_tx_fault; + unsigned int sfp_mod_absent; + unsigned int sfp_diags; + unsigned int sfp_changed; + unsigned int sfp_phy_avail; + unsigned int sfp_cable_len; + enum xgbe_sfp_base sfp_base; + enum xgbe_sfp_cable sfp_cable; + enum xgbe_sfp_speed sfp_speed; + struct xgbe_sfp_eeprom sfp_eeprom; + + /* External PHY support */ + enum xgbe_mdio_mode phydev_mode; + struct mii_bus *mii; + struct phy_device *phydev; + enum xgbe_mdio_reset mdio_reset; + unsigned int mdio_reset_addr; + unsigned int mdio_reset_gpio; + + /* Re-driver support */ + unsigned int redrv; + unsigned int redrv_if; + unsigned int redrv_addr; + unsigned int redrv_lane; + unsigned int redrv_model; +}; + +/* I2C, MDIO and GPIO lines are muxed, so only one device at a time */ +static DEFINE_MUTEX(xgbe_phy_comm_lock); + +static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata); + +static int xgbe_phy_i2c_xfer(struct xgbe_prv_data *pdata, + struct xgbe_i2c_op *i2c_op) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* Be sure we own the bus */ + if (WARN_ON(!phy_data->comm_owned)) + return -EIO; + + return pdata->i2c_if.i2c_xfer(pdata, i2c_op); +} + +static int xgbe_phy_redrv_write(struct xgbe_prv_data *pdata, unsigned int reg, + unsigned int val) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_i2c_op i2c_op; + __be16 *redrv_val; + u8 redrv_data[5], csum; + unsigned int i, retry; + int ret; + + /* High byte of register contains read/write indicator */ + redrv_data[0] = ((reg >> 8) & 0xff) << 1; + redrv_data[1] = reg & 0xff; + redrv_val = (__be16 *)&redrv_data[2]; + *redrv_val = cpu_to_be16(val); + + /* Calculate 1 byte checksum */ + csum = 0; + for (i = 0; i < 4; i++) { + csum += redrv_data[i]; + if (redrv_data[i] > csum) + csum++; + } + redrv_data[4] = ~csum; + + retry = 1; +again1: + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = phy_data->redrv_addr; + i2c_op.len = sizeof(redrv_data); + i2c_op.buf = redrv_data; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if (ret) { + if ((ret == -EAGAIN) && retry--) + goto again1; + + return ret; + } + + retry = 1; +again2: + i2c_op.cmd = XGBE_I2C_CMD_READ; + i2c_op.target = phy_data->redrv_addr; + i2c_op.len = 1; + i2c_op.buf = redrv_data; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if (ret) { + if ((ret == -EAGAIN) && retry--) + goto again2; + + return ret; + } + + if (redrv_data[0] != 0xff) { + netif_dbg(pdata, drv, pdata->netdev, + "Redriver write checksum error\n"); + ret = -EIO; + } + + return ret; +} + +static int xgbe_phy_i2c_write(struct xgbe_prv_data *pdata, unsigned int target, + void *val, unsigned int val_len) +{ + struct xgbe_i2c_op i2c_op; + int retry, ret; + + retry = 1; +again: + /* Write the specfied register */ + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = target; + i2c_op.len = val_len; + i2c_op.buf = val; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if ((ret == -EAGAIN) && retry--) + goto again; + + return ret; +} + +static int xgbe_phy_i2c_read(struct xgbe_prv_data *pdata, unsigned int target, + void *reg, unsigned int reg_len, + void *val, unsigned int val_len) +{ + struct xgbe_i2c_op i2c_op; + int retry, ret; + + retry = 1; +again1: + /* Set the specified register to read */ + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = target; + i2c_op.len = reg_len; + i2c_op.buf = reg; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if (ret) { + if ((ret == -EAGAIN) && retry--) + goto again1; + + return ret; + } + + retry = 1; +again2: + /* Read the specfied register */ + i2c_op.cmd = XGBE_I2C_CMD_READ; + i2c_op.target = target; + i2c_op.len = val_len; + i2c_op.buf = val; + ret = xgbe_phy_i2c_xfer(pdata, &i2c_op); + if ((ret == -EAGAIN) && retry--) + goto again2; + + return ret; +} + +static int xgbe_phy_sfp_put_mux(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_i2c_op i2c_op; + u8 mux_channel; + + if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT) + return 0; + + /* Select no mux channels */ + mux_channel = 0; + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = phy_data->sfp_mux_address; + i2c_op.len = sizeof(mux_channel); + i2c_op.buf = &mux_channel; + + return xgbe_phy_i2c_xfer(pdata, &i2c_op); +} + +static int xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_i2c_op i2c_op; + u8 mux_channel; + + if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT) + return 0; + + /* Select desired mux channel */ + mux_channel = 1 << phy_data->sfp_mux_channel; + i2c_op.cmd = XGBE_I2C_CMD_WRITE; + i2c_op.target = phy_data->sfp_mux_address; + i2c_op.len = sizeof(mux_channel); + i2c_op.buf = &mux_channel; + + return xgbe_phy_i2c_xfer(pdata, &i2c_op); +} + +static void xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + phy_data->comm_owned = 0; + + mutex_unlock(&xgbe_phy_comm_lock); +} + +static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned long timeout; + unsigned int mutex_id; + + if (phy_data->comm_owned) + return 0; + + /* The I2C and MDIO/GPIO bus is multiplexed between multiple devices, + * the driver needs to take the software mutex and then the hardware + * mutexes before being able to use the busses. + */ + mutex_lock(&xgbe_phy_comm_lock); + + /* Clear the mutexes */ + XP_IOWRITE(pdata, XP_I2C_MUTEX, XGBE_MUTEX_RELEASE); + XP_IOWRITE(pdata, XP_MDIO_MUTEX, XGBE_MUTEX_RELEASE); + + /* Mutex formats are the same for I2C and MDIO/GPIO */ + mutex_id = 0; + XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ID, phy_data->port_id); + XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ACTIVE, 1); + + timeout = jiffies + (5 * HZ); + while (time_before(jiffies, timeout)) { + /* Must be all zeroes in order to obtain the mutex */ + if (XP_IOREAD(pdata, XP_I2C_MUTEX) || + XP_IOREAD(pdata, XP_MDIO_MUTEX)) { + usleep_range(100, 200); + continue; + } + + /* Obtain the mutex */ + XP_IOWRITE(pdata, XP_I2C_MUTEX, mutex_id); + XP_IOWRITE(pdata, XP_MDIO_MUTEX, mutex_id); + + phy_data->comm_owned = 1; + return 0; + } + + mutex_unlock(&xgbe_phy_comm_lock); + + netdev_err(pdata->netdev, "unable to obtain hardware mutexes\n"); + + return -ETIMEDOUT; +} + +static int xgbe_phy_mdio_mii_write(struct xgbe_prv_data *pdata, int addr, + int reg, u16 val) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (reg & MII_ADDR_C45) { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45) + return -ENOTSUPP; + } else { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22) + return -ENOTSUPP; + } + + return pdata->hw_if.write_ext_mii_regs(pdata, addr, reg, val); +} + +static int xgbe_phy_i2c_mii_write(struct xgbe_prv_data *pdata, int reg, u16 val) +{ + __be16 *mii_val; + u8 mii_data[3]; + int ret; + + ret = xgbe_phy_sfp_get_mux(pdata); + if (ret) + return ret; + + mii_data[0] = reg & 0xff; + mii_val = (__be16 *)&mii_data[1]; + *mii_val = cpu_to_be16(val); + + ret = xgbe_phy_i2c_write(pdata, XGBE_SFP_PHY_ADDRESS, + mii_data, sizeof(mii_data)); + + xgbe_phy_sfp_put_mux(pdata); + + return ret; +} + +static int xgbe_phy_mii_write(struct mii_bus *mii, int addr, int reg, u16 val) +{ + struct xgbe_prv_data *pdata = mii->priv; + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return ret; + + if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) + ret = xgbe_phy_i2c_mii_write(pdata, reg, val); + else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO) + ret = xgbe_phy_mdio_mii_write(pdata, addr, reg, val); + else + ret = -ENOTSUPP; + + xgbe_phy_put_comm_ownership(pdata); + + return ret; +} + +static int xgbe_phy_mdio_mii_read(struct xgbe_prv_data *pdata, int addr, + int reg) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (reg & MII_ADDR_C45) { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45) + return -ENOTSUPP; + } else { + if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22) + return -ENOTSUPP; + } + + return pdata->hw_if.read_ext_mii_regs(pdata, addr, reg); +} + +static int xgbe_phy_i2c_mii_read(struct xgbe_prv_data *pdata, int reg) +{ + __be16 mii_val; + u8 mii_reg; + int ret; + + ret = xgbe_phy_sfp_get_mux(pdata); + if (ret) + return ret; + + mii_reg = reg; + ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_PHY_ADDRESS, + &mii_reg, sizeof(mii_reg), + &mii_val, sizeof(mii_val)); + if (!ret) + ret = be16_to_cpu(mii_val); + + xgbe_phy_sfp_put_mux(pdata); + + return ret; +} + +static int xgbe_phy_mii_read(struct mii_bus *mii, int addr, int reg) +{ + struct xgbe_prv_data *pdata = mii->priv; + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return ret; + + if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) + ret = xgbe_phy_i2c_mii_read(pdata, reg); + else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO) + ret = xgbe_phy_mdio_mii_read(pdata, addr, reg); + else + ret = -ENOTSUPP; + + xgbe_phy_put_comm_ownership(pdata); + + return ret; +} + +static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (phy_data->sfp_mod_absent) { + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.advertising = pdata->phy.supported; + } + + pdata->phy.advertising &= ~ADVERTISED_Autoneg; + pdata->phy.advertising &= ~ADVERTISED_TP; + pdata->phy.advertising &= ~ADVERTISED_FIBRE; + pdata->phy.advertising &= ~ADVERTISED_100baseT_Full; + pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full; + pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full; + pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC; + + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + pdata->phy.speed = SPEED_UNKNOWN; + pdata->phy.duplex = DUPLEX_UNKNOWN; + pdata->phy.autoneg = AUTONEG_ENABLE; + pdata->phy.advertising |= ADVERTISED_Autoneg; + break; + case XGBE_SFP_BASE_10000_SR: + case XGBE_SFP_BASE_10000_LR: + case XGBE_SFP_BASE_10000_LRM: + case XGBE_SFP_BASE_10000_ER: + case XGBE_SFP_BASE_10000_CR: + default: + pdata->phy.speed = SPEED_10000; + pdata->phy.duplex = DUPLEX_FULL; + pdata->phy.autoneg = AUTONEG_DISABLE; + break; + } + + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + case XGBE_SFP_BASE_1000_CX: + case XGBE_SFP_BASE_10000_CR: + pdata->phy.advertising |= ADVERTISED_TP; + break; + default: + pdata->phy.advertising |= ADVERTISED_FIBRE; + } + + switch (phy_data->sfp_speed) { + case XGBE_SFP_SPEED_100_1000: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) + pdata->phy.advertising |= ADVERTISED_100baseT_Full; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + break; + case XGBE_SFP_SPEED_1000: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + break; + case XGBE_SFP_SPEED_10000: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) + pdata->phy.advertising |= ADVERTISED_10000baseT_Full; + break; + default: + /* Choose the fastest supported speed */ + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) + pdata->phy.advertising |= ADVERTISED_10000baseT_Full; + else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + pdata->phy.advertising |= ADVERTISED_1000baseT_Full; + else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) + pdata->phy.advertising |= ADVERTISED_100baseT_Full; + } +} + +static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom, + enum xgbe_sfp_speed sfp_speed) +{ + u8 *sfp_base, min, max; + + sfp_base = sfp_eeprom->base; + + switch (sfp_speed) { + case XGBE_SFP_SPEED_1000: + min = XGBE_SFP_BASE_BR_1GBE_MIN; + max = XGBE_SFP_BASE_BR_1GBE_MAX; + break; + case XGBE_SFP_SPEED_10000: + min = XGBE_SFP_BASE_BR_10GBE_MIN; + max = XGBE_SFP_BASE_BR_10GBE_MAX; + break; + default: + return false; + } + + return ((sfp_base[XGBE_SFP_BASE_BR] >= min) && + (sfp_base[XGBE_SFP_BASE_BR] <= max)); +} + +static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (phy_data->phydev) { + phy_detach(phy_data->phydev); + phy_device_remove(phy_data->phydev); + phy_device_free(phy_data->phydev); + phy_data->phydev = NULL; + } +} + +static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int phy_id = phy_data->phydev->phy_id; + + if ((phy_id & 0xfffffff0) != 0x01ff0cc0) + return false; + + /* Enable Base-T AN */ + phy_write(phy_data->phydev, 0x16, 0x0001); + phy_write(phy_data->phydev, 0x00, 0x9140); + phy_write(phy_data->phydev, 0x16, 0x0000); + + /* Enable SGMII at 100Base-T/1000Base-T Full Duplex */ + phy_write(phy_data->phydev, 0x1b, 0x9084); + phy_write(phy_data->phydev, 0x09, 0x0e00); + phy_write(phy_data->phydev, 0x00, 0x8140); + phy_write(phy_data->phydev, 0x04, 0x0d01); + phy_write(phy_data->phydev, 0x00, 0x9140); + + phy_data->phydev->supported = PHY_GBIT_FEATURES; + phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + phy_data->phydev->advertising = phy_data->phydev->supported; + + netif_dbg(pdata, drv, pdata->netdev, + "Finisar PHY quirk in place\n"); + + return true; +} + +static void xgbe_phy_external_phy_quirks(struct xgbe_prv_data *pdata) +{ + if (xgbe_phy_finisar_phy_quirks(pdata)) + return; +} + +static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct phy_device *phydev; + int ret; + + /* If we already have a PHY, just return */ + if (phy_data->phydev) + return 0; + + /* Check for the use of an external PHY */ + if (phy_data->phydev_mode == XGBE_MDIO_MODE_NONE) + return 0; + + /* For SFP, only use an external PHY if available */ + if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) && + !phy_data->sfp_phy_avail) + return 0; + + /* Create and connect to the PHY device */ + phydev = get_phy_device(phy_data->mii, phy_data->mdio_addr, + (phy_data->phydev_mode == XGBE_MDIO_MODE_CL45)); + if (IS_ERR(phydev)) { + netdev_err(pdata->netdev, "get_phy_device failed\n"); + return -ENODEV; + } + netif_dbg(pdata, drv, pdata->netdev, "external PHY id is %#010x\n", + phydev->phy_id); + + /*TODO: If c45, add request_module based on one of the MMD ids? */ + + ret = phy_device_register(phydev); + if (ret) { + netdev_err(pdata->netdev, "phy_device_register failed\n"); + phy_device_free(phydev); + return ret; + } + + ret = phy_attach_direct(pdata->netdev, phydev, phydev->dev_flags, + PHY_INTERFACE_MODE_SGMII); + if (ret) { + netdev_err(pdata->netdev, "phy_attach_direct failed\n"); + phy_device_remove(phydev); + phy_device_free(phydev); + return ret; + } + phy_data->phydev = phydev; + + xgbe_phy_external_phy_quirks(pdata); + phydev->advertising &= pdata->phy.advertising; + + phy_start_aneg(phy_data->phydev); + + return 0; +} + +static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + if (!phy_data->sfp_changed) + return; + + phy_data->sfp_phy_avail = 0; + + if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) + return; + + /* Check access to the PHY by reading CTRL1 */ + ret = xgbe_phy_i2c_mii_read(pdata, MII_BMCR); + if (ret < 0) + return; + + /* Successfully accessed the PHY */ + phy_data->sfp_phy_avail = 1; +} + +static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; + + if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME], + XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN)) + return false; + + if (!memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN], + XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN)) { + phy_data->sfp_base = XGBE_SFP_BASE_1000_SX; + phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE; + phy_data->sfp_speed = XGBE_SFP_SPEED_1000; + if (phy_data->sfp_changed) + netif_dbg(pdata, drv, pdata->netdev, + "Bel-Fuse SFP quirk in place\n"); + return true; + } + + return false; +} + +static bool xgbe_phy_sfp_parse_quirks(struct xgbe_prv_data *pdata) +{ + if (xgbe_phy_belfuse_parse_quirks(pdata)) + return true; + + return false; +} + +static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; + u8 *sfp_base; + + sfp_base = sfp_eeprom->base; + + if (sfp_base[XGBE_SFP_BASE_ID] != XGBE_SFP_ID_SFP) + return; + + if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP) + return; + + if (xgbe_phy_sfp_parse_quirks(pdata)) + return; + + /* Assume ACTIVE cable unless told it is PASSIVE */ + if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) { + phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE; + phy_data->sfp_cable_len = sfp_base[XGBE_SFP_BASE_CU_CABLE_LEN]; + } else { + phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE; + } + + /* Determine the type of SFP */ + if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR) + phy_data->sfp_base = XGBE_SFP_BASE_10000_SR; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR) + phy_data->sfp_base = XGBE_SFP_BASE_10000_LR; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LRM) + phy_data->sfp_base = XGBE_SFP_BASE_10000_LRM; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_ER) + phy_data->sfp_base = XGBE_SFP_BASE_10000_ER; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_SX) + phy_data->sfp_base = XGBE_SFP_BASE_1000_SX; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_LX) + phy_data->sfp_base = XGBE_SFP_BASE_1000_LX; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_CX) + phy_data->sfp_base = XGBE_SFP_BASE_1000_CX; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T) + phy_data->sfp_base = XGBE_SFP_BASE_1000_T; + else if ((phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE) && + xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000)) + phy_data->sfp_base = XGBE_SFP_BASE_10000_CR; + + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + phy_data->sfp_speed = XGBE_SFP_SPEED_100_1000; + break; + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + phy_data->sfp_speed = XGBE_SFP_SPEED_1000; + break; + case XGBE_SFP_BASE_10000_SR: + case XGBE_SFP_BASE_10000_LR: + case XGBE_SFP_BASE_10000_LRM: + case XGBE_SFP_BASE_10000_ER: + case XGBE_SFP_BASE_10000_CR: + phy_data->sfp_speed = XGBE_SFP_SPEED_10000; + break; + default: + break; + } +} + +static void xgbe_phy_sfp_eeprom_info(struct xgbe_prv_data *pdata, + struct xgbe_sfp_eeprom *sfp_eeprom) +{ + struct xgbe_sfp_ascii sfp_ascii; + char *sfp_data = (char *)&sfp_ascii; + + netif_dbg(pdata, drv, pdata->netdev, "SFP detected:\n"); + memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME], + XGBE_SFP_BASE_VENDOR_NAME_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_NAME_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " vendor: %s\n", + sfp_data); + + memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN], + XGBE_SFP_BASE_VENDOR_PN_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_PN_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " part number: %s\n", + sfp_data); + + memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_REV], + XGBE_SFP_BASE_VENDOR_REV_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_REV_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " revision level: %s\n", + sfp_data); + + memcpy(sfp_data, &sfp_eeprom->extd[XGBE_SFP_BASE_VENDOR_SN], + XGBE_SFP_BASE_VENDOR_SN_LEN); + sfp_data[XGBE_SFP_BASE_VENDOR_SN_LEN] = '\0'; + netif_dbg(pdata, drv, pdata->netdev, " serial number: %s\n", + sfp_data); +} + +static bool xgbe_phy_sfp_verify_eeprom(u8 cc_in, u8 *buf, unsigned int len) +{ + u8 cc; + + for (cc = 0; len; buf++, len--) + cc += *buf; + + return (cc == cc_in) ? true : false; +} + +static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + struct xgbe_sfp_eeprom sfp_eeprom; + u8 eeprom_addr; + int ret; + + ret = xgbe_phy_sfp_get_mux(pdata); + if (ret) { + netdev_err(pdata->netdev, "I2C error setting SFP MUX\n"); + return ret; + } + + /* Read the SFP serial ID eeprom */ + eeprom_addr = 0; + ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS, + &eeprom_addr, sizeof(eeprom_addr), + &sfp_eeprom, sizeof(sfp_eeprom)); + if (ret) { + netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n"); + goto put; + } + + /* Validate the contents read */ + if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.base[XGBE_SFP_BASE_CC], + sfp_eeprom.base, + sizeof(sfp_eeprom.base) - 1)) { + ret = -EINVAL; + goto put; + } + + if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.extd[XGBE_SFP_EXTD_CC], + sfp_eeprom.extd, + sizeof(sfp_eeprom.extd) - 1)) { + ret = -EINVAL; + goto put; + } + + /* Check for an added or changed SFP */ + if (memcmp(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom))) { + phy_data->sfp_changed = 1; + + if (netif_msg_drv(pdata)) + xgbe_phy_sfp_eeprom_info(pdata, &sfp_eeprom); + + memcpy(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom)); + + if (sfp_eeprom.extd[XGBE_SFP_EXTD_SFF_8472]) { + u8 diag_type = sfp_eeprom.extd[XGBE_SFP_EXTD_DIAG]; + + if (!(diag_type & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE)) + phy_data->sfp_diags = 1; + } + + xgbe_phy_free_phy_device(pdata); + } else { + phy_data->sfp_changed = 0; + } + +put: + xgbe_phy_sfp_put_mux(pdata); + + return ret; +} + +static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int gpio_input; + u8 gpio_reg, gpio_ports[2]; + int ret; + + /* Read the input port registers */ + gpio_reg = 0; + ret = xgbe_phy_i2c_read(pdata, phy_data->sfp_gpio_address, + &gpio_reg, sizeof(gpio_reg), + gpio_ports, sizeof(gpio_ports)); + if (ret) { + netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n"); + return; + } + + gpio_input = (gpio_ports[1] << 8) | gpio_ports[0]; + + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) { + /* No GPIO, just assume the module is present for now */ + phy_data->sfp_mod_absent = 0; + } else { + if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent))) + phy_data->sfp_mod_absent = 0; + } + + if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) && + (gpio_input & (1 << phy_data->sfp_gpio_rx_los))) + phy_data->sfp_rx_los = 1; + + if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) && + (gpio_input & (1 << phy_data->sfp_gpio_tx_fault))) + phy_data->sfp_tx_fault = 1; +} + +static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + xgbe_phy_free_phy_device(pdata); + + phy_data->sfp_mod_absent = 1; + phy_data->sfp_phy_avail = 0; + memset(&phy_data->sfp_eeprom, 0, sizeof(phy_data->sfp_eeprom)); +} + +static void xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data) +{ + phy_data->sfp_rx_los = 0; + phy_data->sfp_tx_fault = 0; + phy_data->sfp_mod_absent = 1; + phy_data->sfp_diags = 0; + phy_data->sfp_base = XGBE_SFP_BASE_UNKNOWN; + phy_data->sfp_cable = XGBE_SFP_CABLE_UNKNOWN; + phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN; +} + +static void xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + /* Reset the SFP signals and info */ + xgbe_phy_sfp_reset(phy_data); + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return; + + /* Read the SFP signals and check for module presence */ + xgbe_phy_sfp_signals(pdata); + if (phy_data->sfp_mod_absent) { + xgbe_phy_sfp_mod_absent(pdata); + goto put; + } + + ret = xgbe_phy_sfp_read_eeprom(pdata); + if (ret) { + /* Treat any error as if there isn't an SFP plugged in */ + xgbe_phy_sfp_reset(phy_data); + xgbe_phy_sfp_mod_absent(pdata); + goto put; + } + + xgbe_phy_sfp_parse_eeprom(pdata); + + xgbe_phy_sfp_external_phy(pdata); + +put: + xgbe_phy_sfp_phy_settings(pdata); + + xgbe_phy_put_comm_ownership(pdata); +} + +static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + u16 lcl_adv = 0, rmt_adv = 0; + u8 fc; + + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (!phy_data->phydev) + return; + + if (phy_data->phydev->advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (phy_data->phydev->advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + if (phy_data->phydev->pause) { + pdata->phy.lp_advertising |= ADVERTISED_Pause; + rmt_adv |= LPA_PAUSE_CAP; + } + if (phy_data->phydev->asym_pause) { + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + rmt_adv |= LPA_PAUSE_ASYM; + } + + fc = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + if (fc & FLOW_CTRL_TX) + pdata->phy.tx_pause = 1; + if (fc & FLOW_CTRL_RX) + pdata->phy.rx_pause = 1; +} + +static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata) +{ + enum xgbe_mode mode; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_TP; + + /* Use external PHY to determine flow control */ + if (pdata->phy.pause_autoneg) + xgbe_phy_phydev_flowctrl(pdata); + + switch (pdata->an_status & XGBE_SGMII_AN_LINK_SPEED) { + case XGBE_SGMII_AN_LINK_SPEED_100: + if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { + pdata->phy.lp_advertising |= ADVERTISED_100baseT_Full; + mode = XGBE_MODE_SGMII_100; + } else { + /* Half-duplex not supported */ + pdata->phy.lp_advertising |= ADVERTISED_100baseT_Half; + mode = XGBE_MODE_UNKNOWN; + } + break; + case XGBE_SGMII_AN_LINK_SPEED_1000: + if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full; + mode = XGBE_MODE_SGMII_1000; + } else { + /* Half-duplex not supported */ + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half; + mode = XGBE_MODE_UNKNOWN; + } + break; + default: + mode = XGBE_MODE_UNKNOWN; + } + + return mode; +} + +static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata) +{ + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_FIBRE; + + /* Compare Advertisement and Link Partner register */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_LP_ABILITY); + if (lp_reg & 0x100) + pdata->phy.lp_advertising |= ADVERTISED_Pause; + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + + if (pdata->phy.pause_autoneg) { + /* Set flow control based on auto-negotiation result */ + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (ad_reg & lp_reg & 0x100) { + pdata->phy.tx_pause = 1; + pdata->phy.rx_pause = 1; + } else if (ad_reg & lp_reg & 0x80) { + if (ad_reg & 0x100) + pdata->phy.rx_pause = 1; + else if (lp_reg & 0x100) + pdata->phy.tx_pause = 1; + } + } + + if (lp_reg & 0x40) + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half; + if (lp_reg & 0x20) + pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full; + + /* Half duplex is not supported */ + ad_reg &= lp_reg; + mode = (ad_reg & 0x20) ? XGBE_MODE_X : XGBE_MODE_UNKNOWN; + + return mode; +} + +static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_Backplane; + + /* Use external PHY to determine flow control */ + if (pdata->phy.pause_autoneg) + xgbe_phy_phydev_flowctrl(pdata); + + /* Compare Advertisement and Link Partner register 2 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; + if (lp_reg & 0x20) + pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; + + ad_reg &= lp_reg; + if (ad_reg & 0x80) { + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + mode = XGBE_MODE_KR; + break; + default: + mode = XGBE_MODE_SFI; + break; + } + } else if (ad_reg & 0x20) { + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + mode = XGBE_MODE_KX_1000; + break; + case XGBE_PORT_MODE_1000BASE_X: + mode = XGBE_MODE_X; + break; + case XGBE_PORT_MODE_SFP: + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_100)) + mode = XGBE_MODE_SGMII_100; + else + mode = XGBE_MODE_SGMII_1000; + break; + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + default: + mode = XGBE_MODE_X; + break; + } + break; + default: + if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_100)) + mode = XGBE_MODE_SGMII_100; + else + mode = XGBE_MODE_SGMII_1000; + break; + } + } else { + mode = XGBE_MODE_UNKNOWN; + } + + /* Compare Advertisement and Link Partner register 3 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + if (lp_reg & 0xc000) + pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; + + return mode; +} + +static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata) +{ + enum xgbe_mode mode; + unsigned int ad_reg, lp_reg; + + pdata->phy.lp_advertising |= ADVERTISED_Autoneg; + pdata->phy.lp_advertising |= ADVERTISED_Backplane; + + /* Compare Advertisement and Link Partner register 1 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA); + if (lp_reg & 0x400) + pdata->phy.lp_advertising |= ADVERTISED_Pause; + if (lp_reg & 0x800) + pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause; + + if (pdata->phy.pause_autoneg) { + /* Set flow control based on auto-negotiation result */ + pdata->phy.tx_pause = 0; + pdata->phy.rx_pause = 0; + + if (ad_reg & lp_reg & 0x400) { + pdata->phy.tx_pause = 1; + pdata->phy.rx_pause = 1; + } else if (ad_reg & lp_reg & 0x800) { + if (ad_reg & 0x400) + pdata->phy.rx_pause = 1; + else if (lp_reg & 0x400) + pdata->phy.tx_pause = 1; + } + } + + /* Compare Advertisement and Link Partner register 2 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1); + if (lp_reg & 0x80) + pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full; + if (lp_reg & 0x20) + pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full; + + ad_reg &= lp_reg; + if (ad_reg & 0x80) + mode = XGBE_MODE_KR; + else if (ad_reg & 0x20) + mode = XGBE_MODE_KX_1000; + else + mode = XGBE_MODE_UNKNOWN; + + /* Compare Advertisement and Link Partner register 3 */ + ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2); + lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2); + if (lp_reg & 0xc000) + pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC; + + return mode; +} + +static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata) +{ + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + return xgbe_phy_an73_outcome(pdata); + case XGBE_AN_MODE_CL73_REDRV: + return xgbe_phy_an73_redrv_outcome(pdata); + case XGBE_AN_MODE_CL37: + return xgbe_phy_an37_outcome(pdata); + case XGBE_AN_MODE_CL37_SGMII: + return xgbe_phy_an37_sgmii_outcome(pdata); + default: + return XGBE_MODE_UNKNOWN; + } +} + +static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int advertising; + + /* Without a re-driver, just return current advertising */ + if (!phy_data->redrv) + return pdata->phy.advertising; + + /* With the KR re-driver we need to advertise a single speed */ + advertising = pdata->phy.advertising; + advertising &= ~ADVERTISED_1000baseKX_Full; + advertising &= ~ADVERTISED_10000baseKR_Full; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + advertising |= ADVERTISED_10000baseKR_Full; + break; + case XGBE_PORT_MODE_BACKPLANE_2500: + advertising |= ADVERTISED_1000baseKX_Full; + break; + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_NBASE_T: + advertising |= ADVERTISED_1000baseKX_Full; + break; + case XGBE_PORT_MODE_10GBASE_T: + if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_10000)) + advertising |= ADVERTISED_10000baseKR_Full; + else + advertising |= ADVERTISED_1000baseKX_Full; + break; + case XGBE_PORT_MODE_10GBASE_R: + advertising |= ADVERTISED_10000baseKR_Full; + break; + case XGBE_PORT_MODE_SFP: + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + advertising |= ADVERTISED_1000baseKX_Full; + break; + default: + advertising |= ADVERTISED_10000baseKR_Full; + break; + } + break; + default: + advertising |= ADVERTISED_10000baseKR_Full; + break; + } + + return advertising; +} + +static int xgbe_phy_an_config(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + ret = xgbe_phy_find_phy_device(pdata); + if (ret) + return ret; + + if (!phy_data->phydev) + return 0; + + phy_data->phydev->autoneg = pdata->phy.autoneg; + phy_data->phydev->advertising = phy_data->phydev->supported & + pdata->phy.advertising; + + if (pdata->phy.autoneg != AUTONEG_ENABLE) { + phy_data->phydev->speed = pdata->phy.speed; + phy_data->phydev->duplex = pdata->phy.duplex; + } + + ret = phy_start_aneg(phy_data->phydev); + + return ret; +} + +static enum xgbe_an_mode xgbe_phy_an_sfp_mode(struct xgbe_phy_data *phy_data) +{ + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: + return XGBE_AN_MODE_CL37_SGMII; + case XGBE_SFP_BASE_1000_SX: + case XGBE_SFP_BASE_1000_LX: + case XGBE_SFP_BASE_1000_CX: + return XGBE_AN_MODE_CL37; + default: + return XGBE_AN_MODE_NONE; + } +} + +static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* A KR re-driver will always require CL73 AN */ + if (phy_data->redrv) + return XGBE_AN_MODE_CL73_REDRV; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return XGBE_AN_MODE_CL73; + case XGBE_PORT_MODE_BACKPLANE_2500: + return XGBE_AN_MODE_NONE; + case XGBE_PORT_MODE_1000BASE_T: + return XGBE_AN_MODE_CL37_SGMII; + case XGBE_PORT_MODE_1000BASE_X: + return XGBE_AN_MODE_CL37; + case XGBE_PORT_MODE_NBASE_T: + return XGBE_AN_MODE_CL37_SGMII; + case XGBE_PORT_MODE_10GBASE_T: + return XGBE_AN_MODE_CL73; + case XGBE_PORT_MODE_10GBASE_R: + return XGBE_AN_MODE_NONE; + case XGBE_PORT_MODE_SFP: + return xgbe_phy_an_sfp_mode(phy_data); + default: + return XGBE_AN_MODE_NONE; + } +} + +static int xgbe_phy_set_redrv_mode_mdio(struct xgbe_prv_data *pdata, + enum xgbe_phy_redrv_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + u16 redrv_reg, redrv_val; + + redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000); + redrv_val = (u16)mode; + + return pdata->hw_if.write_ext_mii_regs(pdata, phy_data->redrv_addr, + redrv_reg, redrv_val); +} + +static int xgbe_phy_set_redrv_mode_i2c(struct xgbe_prv_data *pdata, + enum xgbe_phy_redrv_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int redrv_reg; + int ret; + + /* Calculate the register to write */ + redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000); + + ret = xgbe_phy_redrv_write(pdata, redrv_reg, mode); + + return ret; +} + +static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_phy_redrv_mode mode; + int ret; + + if (!phy_data->redrv) + return; + + mode = XGBE_PHY_REDRV_MODE_CX; + if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) && + (phy_data->sfp_base != XGBE_SFP_BASE_1000_CX) && + (phy_data->sfp_base != XGBE_SFP_BASE_10000_CR)) + mode = XGBE_PHY_REDRV_MODE_SR; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return; + + if (phy_data->redrv_if) + xgbe_phy_set_redrv_mode_i2c(pdata, mode); + else + xgbe_phy_set_redrv_mode_mdio(pdata, mode); + + xgbe_phy_put_comm_ownership(pdata); +} + +static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata) +{ + if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) + return; + + /* Log if a previous command did not complete */ + netif_dbg(pdata, link, pdata->netdev, + "firmware mailbox not ready for command\n"); +} + +static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata) +{ + unsigned int wait; + + /* Wait for command to complete */ + wait = XGBE_RATECHANGE_COUNT; + while (wait--) { + if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) + return; + + usleep_range(1000, 2000); + } + + netif_dbg(pdata, link, pdata->netdev, + "firmware mailbox command did not complete\n"); +} + +static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) +{ + unsigned int s0; + + xgbe_phy_start_ratechange(pdata); + + /* Receiver Reset Cycle */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n"); +} + +static void xgbe_phy_power_off(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + xgbe_phy_start_ratechange(pdata); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_UNKNOWN; + + netif_dbg(pdata, link, pdata->netdev, "phy powered off\n"); +} + +static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 10G/SFI */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3); + if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) { + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + } else { + if (phy_data->sfp_cable_len <= 1) + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1); + else if (phy_data->sfp_cable_len <= 3) + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2); + else if (phy_data->sfp_cable_len <= 5) + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + else + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + } + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_SFI; + + netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n"); +} + +static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/X */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_X; + + netif_dbg(pdata, link, pdata->netdev, "1GbE X mode set\n"); +} + +static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/SGMII */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_SGMII_1000; + + netif_dbg(pdata, link, pdata->netdev, "1GbE SGMII mode set\n"); +} + +static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/SGMII */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_SGMII_100; + + netif_dbg(pdata, link, pdata->netdev, "100MbE SGMII mode set\n"); +} + +static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 10G/KR */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_KR; + + netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n"); +} + +static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 2.5G/KX */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_KX_2500; + + netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n"); +} + +static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int s0; + + xgbe_phy_set_redrv_mode(pdata); + + xgbe_phy_start_ratechange(pdata); + + /* 1G/KX */ + s0 = 0; + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1); + XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3); + + /* Call FW to make the change */ + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0); + XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0); + XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1); + + xgbe_phy_complete_ratechange(pdata); + + phy_data->cur_mode = XGBE_MODE_KX_1000; + + netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n"); +} + +static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + return phy_data->cur_mode; +} + +static enum xgbe_mode xgbe_phy_switch_baset_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* No switching if not 10GBase-T */ + if (phy_data->port_mode != XGBE_PORT_MODE_10GBASE_T) + return xgbe_phy_cur_mode(pdata); + + switch (xgbe_phy_cur_mode(pdata)) { + case XGBE_MODE_SGMII_100: + case XGBE_MODE_SGMII_1000: + return XGBE_MODE_KR; + case XGBE_MODE_KR: + default: + return XGBE_MODE_SGMII_1000; + } +} + +static enum xgbe_mode xgbe_phy_switch_bp_2500_mode(struct xgbe_prv_data *pdata) +{ + return XGBE_MODE_KX_2500; +} + +static enum xgbe_mode xgbe_phy_switch_bp_mode(struct xgbe_prv_data *pdata) +{ + /* If we are in KR switch to KX, and vice-versa */ + switch (xgbe_phy_cur_mode(pdata)) { + case XGBE_MODE_KX_1000: + return XGBE_MODE_KR; + case XGBE_MODE_KR: + default: + return XGBE_MODE_KX_1000; + } +} + +static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_switch_bp_mode(pdata); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_switch_bp_2500_mode(pdata); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_switch_baset_mode(pdata); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + case XGBE_PORT_MODE_SFP: + /* No switching, so just return current mode */ + return xgbe_phy_cur_mode(pdata); + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_basex_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_1000: + return XGBE_MODE_X; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + return XGBE_MODE_SGMII_100; + case SPEED_1000: + return XGBE_MODE_SGMII_1000; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_sfp_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + return XGBE_MODE_SGMII_100; + case SPEED_1000: + if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) + return XGBE_MODE_SGMII_1000; + else + return XGBE_MODE_X; + case SPEED_10000: + case SPEED_UNKNOWN: + return XGBE_MODE_SFI; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_bp_2500_mode(int speed) +{ + switch (speed) { + case SPEED_2500: + return XGBE_MODE_KX_2500; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_bp_mode(int speed) +{ + switch (speed) { + case SPEED_1000: + return XGBE_MODE_KX_1000; + case SPEED_10000: + return XGBE_MODE_KR; + default: + return XGBE_MODE_UNKNOWN; + } +} + +static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata, + int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_get_bp_mode(speed); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_get_bp_2500_mode(speed); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_get_baset_mode(phy_data, speed); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + return xgbe_phy_get_basex_mode(phy_data, speed); + case XGBE_PORT_MODE_SFP: + return xgbe_phy_get_sfp_mode(phy_data, speed); + default: + return XGBE_MODE_UNKNOWN; + } +} + +static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + xgbe_phy_kx_1000_mode(pdata); + break; + case XGBE_MODE_KX_2500: + xgbe_phy_kx_2500_mode(pdata); + break; + case XGBE_MODE_KR: + xgbe_phy_kr_mode(pdata); + break; + case XGBE_MODE_SGMII_100: + xgbe_phy_sgmii_100_mode(pdata); + break; + case XGBE_MODE_SGMII_1000: + xgbe_phy_sgmii_1000_mode(pdata); + break; + case XGBE_MODE_X: + xgbe_phy_x_mode(pdata); + break; + case XGBE_MODE_SFI: + xgbe_phy_sfi_mode(pdata); + break; + default: + break; + } +} + +static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode, u32 advert) +{ + if (pdata->phy.autoneg == AUTONEG_ENABLE) { + if (pdata->phy.advertising & advert) + return true; + } else { + enum xgbe_mode cur_mode; + + cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed); + if (cur_mode == mode) + return true; + } + + return false; +} + +static bool xgbe_phy_use_basex_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_X: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseT_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_SGMII_100: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_100baseT_Full); + case XGBE_MODE_SGMII_1000: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseT_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (mode) { + case XGBE_MODE_X: + if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) + return false; + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_SGMII_100: + if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) + return false; + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_100baseT_Full); + case XGBE_MODE_SGMII_1000: + if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) + return false; + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseT_Full); + case XGBE_MODE_SFI: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseT_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_bp_2500_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_2500: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_2500baseX_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_bp_mode(struct xgbe_prv_data *pdata, + enum xgbe_mode mode) +{ + switch (mode) { + case XGBE_MODE_KX_1000: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_1000baseKX_Full); + case XGBE_MODE_KR: + return xgbe_phy_check_mode(pdata, mode, + ADVERTISED_10000baseKR_Full); + default: + return false; + } +} + +static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_use_bp_mode(pdata, mode); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_use_bp_2500_mode(pdata, mode); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_use_baset_mode(pdata, mode); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + return xgbe_phy_use_basex_mode(pdata, mode); + case XGBE_PORT_MODE_SFP: + return xgbe_phy_use_sfp_mode(pdata, mode); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_basex_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_1000: + return (phy_data->port_mode == XGBE_PORT_MODE_1000BASE_X); + case SPEED_10000: + return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_R); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + case SPEED_1000: + return true; + case SPEED_10000: + return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_phy_data *phy_data, + int speed) +{ + switch (speed) { + case SPEED_100: + return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); + case SPEED_1000: + return ((phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000) || + (phy_data->sfp_speed == XGBE_SFP_SPEED_1000)); + case SPEED_10000: + return (phy_data->sfp_speed == XGBE_SFP_SPEED_10000); + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_bp_2500_mode(int speed) +{ + switch (speed) { + case SPEED_2500: + return true; + default: + return false; + } +} + +static bool xgbe_phy_valid_speed_bp_mode(int speed) +{ + switch (speed) { + case SPEED_1000: + case SPEED_10000: + return true; + default: + return false; + } +} + +static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + return xgbe_phy_valid_speed_bp_mode(speed); + case XGBE_PORT_MODE_BACKPLANE_2500: + return xgbe_phy_valid_speed_bp_2500_mode(speed); + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + return xgbe_phy_valid_speed_baset_mode(phy_data, speed); + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_10GBASE_R: + return xgbe_phy_valid_speed_basex_mode(phy_data, speed); + case XGBE_PORT_MODE_SFP: + return xgbe_phy_valid_speed_sfp_mode(phy_data, speed); + default: + return false; + } +} + +static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int ret, reg; + + *an_restart = 0; + + if (phy_data->port_mode == XGBE_PORT_MODE_SFP) { + /* Check SFP signals */ + xgbe_phy_sfp_detect(pdata); + + if (phy_data->sfp_changed) { + *an_restart = 1; + return 0; + } + + if (phy_data->sfp_mod_absent || phy_data->sfp_rx_los) + return 0; + } + + if (phy_data->phydev) { + /* Check external PHY */ + ret = phy_read_status(phy_data->phydev); + if (ret < 0) + return 0; + + if ((pdata->phy.autoneg == AUTONEG_ENABLE) && + !phy_aneg_done(phy_data->phydev)) + return 0; + + if (!phy_data->phydev->link) + return 0; + } + + /* Link status is latched low, so read once to clear + * and then read again to get current state + */ + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg & MDIO_STAT1_LSTATUS) + return 1; + + /* No link, attempt a receiver reset cycle */ + if (phy_data->rrc_count++) { + phy_data->rrc_count = 0; + xgbe_phy_rrc(pdata); + } + + return 0; +} + +static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + reg = XP_IOREAD(pdata, XP_PROP_3); + + phy_data->sfp_gpio_address = XGBE_GPIO_ADDRESS_PCA9555 + + XP_GET_BITS(reg, XP_PROP_3, GPIO_ADDR); + + phy_data->sfp_gpio_mask = XP_GET_BITS(reg, XP_PROP_3, GPIO_MASK); + + phy_data->sfp_gpio_rx_los = XP_GET_BITS(reg, XP_PROP_3, + GPIO_RX_LOS); + phy_data->sfp_gpio_tx_fault = XP_GET_BITS(reg, XP_PROP_3, + GPIO_TX_FAULT); + phy_data->sfp_gpio_mod_absent = XP_GET_BITS(reg, XP_PROP_3, + GPIO_MOD_ABS); + phy_data->sfp_gpio_rate_select = XP_GET_BITS(reg, XP_PROP_3, + GPIO_RATE_SELECT); + + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "SFP: gpio_address=%#x\n", + phy_data->sfp_gpio_address); + dev_dbg(pdata->dev, "SFP: gpio_mask=%#x\n", + phy_data->sfp_gpio_mask); + dev_dbg(pdata->dev, "SFP: gpio_rx_los=%u\n", + phy_data->sfp_gpio_rx_los); + dev_dbg(pdata->dev, "SFP: gpio_tx_fault=%u\n", + phy_data->sfp_gpio_tx_fault); + dev_dbg(pdata->dev, "SFP: gpio_mod_absent=%u\n", + phy_data->sfp_gpio_mod_absent); + dev_dbg(pdata->dev, "SFP: gpio_rate_select=%u\n", + phy_data->sfp_gpio_rate_select); + } +} + +static void xgbe_phy_sfp_comm_setup(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg, mux_addr_hi, mux_addr_lo; + + reg = XP_IOREAD(pdata, XP_PROP_4); + + mux_addr_hi = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_HI); + mux_addr_lo = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_LO); + if (mux_addr_lo == XGBE_SFP_DIRECT) + return; + + phy_data->sfp_comm = XGBE_SFP_COMM_PCA9545; + phy_data->sfp_mux_address = (mux_addr_hi << 2) + mux_addr_lo; + phy_data->sfp_mux_channel = XP_GET_BITS(reg, XP_PROP_4, MUX_CHAN); + + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "SFP: mux_address=%#x\n", + phy_data->sfp_mux_address); + dev_dbg(pdata->dev, "SFP: mux_channel=%u\n", + phy_data->sfp_mux_channel); + } +} + +static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata) +{ + xgbe_phy_sfp_comm_setup(pdata); + xgbe_phy_sfp_gpio_setup(pdata); +} + +static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int ret; + + ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio); + if (ret) + return ret; + + ret = pdata->hw_if.clr_gpio(pdata, phy_data->mdio_reset_gpio); + + return ret; +} + +static int xgbe_phy_i2c_mdio_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + u8 gpio_reg, gpio_ports[2], gpio_data[3]; + int ret; + + /* Read the output port registers */ + gpio_reg = 2; + ret = xgbe_phy_i2c_read(pdata, phy_data->mdio_reset_addr, + &gpio_reg, sizeof(gpio_reg), + gpio_ports, sizeof(gpio_ports)); + if (ret) + return ret; + + /* Prepare to write the GPIO data */ + gpio_data[0] = 2; + gpio_data[1] = gpio_ports[0]; + gpio_data[2] = gpio_ports[1]; + + /* Set the GPIO pin */ + if (phy_data->mdio_reset_gpio < 8) + gpio_data[1] |= (1 << (phy_data->mdio_reset_gpio % 8)); + else + gpio_data[2] |= (1 << (phy_data->mdio_reset_gpio % 8)); + + /* Write the output port registers */ + ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr, + gpio_data, sizeof(gpio_data)); + if (ret) + return ret; + + /* Clear the GPIO pin */ + if (phy_data->mdio_reset_gpio < 8) + gpio_data[1] &= ~(1 << (phy_data->mdio_reset_gpio % 8)); + else + gpio_data[2] &= ~(1 << (phy_data->mdio_reset_gpio % 8)); + + /* Write the output port registers */ + ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr, + gpio_data, sizeof(gpio_data)); + + return ret; +} + +static int xgbe_phy_mdio_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO) + return 0; + + ret = xgbe_phy_get_comm_ownership(pdata); + if (ret) + return ret; + + if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) + ret = xgbe_phy_i2c_mdio_reset(pdata); + else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) + ret = xgbe_phy_int_mdio_reset(pdata); + + xgbe_phy_put_comm_ownership(pdata); + + return ret; +} + +static bool xgbe_phy_redrv_error(struct xgbe_phy_data *phy_data) +{ + if (!phy_data->redrv) + return false; + + if (phy_data->redrv_if >= XGBE_PHY_REDRV_IF_MAX) + return true; + + switch (phy_data->redrv_model) { + case XGBE_PHY_REDRV_MODEL_4223: + if (phy_data->redrv_lane > 3) + return true; + break; + case XGBE_PHY_REDRV_MODEL_4227: + if (phy_data->redrv_lane > 1) + return true; + break; + default: + return true; + } + + return false; +} + +static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int reg; + + if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO) + return 0; + + reg = XP_IOREAD(pdata, XP_PROP_3); + phy_data->mdio_reset = XP_GET_BITS(reg, XP_PROP_3, MDIO_RESET); + switch (phy_data->mdio_reset) { + case XGBE_MDIO_RESET_NONE: + case XGBE_MDIO_RESET_I2C_GPIO: + case XGBE_MDIO_RESET_INT_GPIO: + break; + default: + dev_err(pdata->dev, "unsupported MDIO reset (%#x)\n", + phy_data->mdio_reset); + return -EINVAL; + } + + if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) { + phy_data->mdio_reset_addr = XGBE_GPIO_ADDRESS_PCA9555 + + XP_GET_BITS(reg, XP_PROP_3, + MDIO_RESET_I2C_ADDR); + phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3, + MDIO_RESET_I2C_GPIO); + } else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) { + phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3, + MDIO_RESET_INT_GPIO); + } + + return 0; +} + +static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) + return false; + break; + case XGBE_PORT_MODE_BACKPLANE_2500: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) + return false; + break; + case XGBE_PORT_MODE_1000BASE_T: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)) + return false; + break; + case XGBE_PORT_MODE_1000BASE_X: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) + return false; + break; + case XGBE_PORT_MODE_NBASE_T: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500)) + return false; + break; + case XGBE_PORT_MODE_10GBASE_T: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) + return false; + break; + case XGBE_PORT_MODE_10GBASE_R: + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) + return false; + break; + case XGBE_PORT_MODE_SFP: + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) + return false; + break; + default: + break; + } + + return true; +} + +static bool xgbe_phy_conn_type_mismatch(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_BACKPLANE: + case XGBE_PORT_MODE_BACKPLANE_2500: + if (phy_data->conn_type == XGBE_CONN_TYPE_BACKPLANE) + return false; + break; + case XGBE_PORT_MODE_1000BASE_T: + case XGBE_PORT_MODE_1000BASE_X: + case XGBE_PORT_MODE_NBASE_T: + case XGBE_PORT_MODE_10GBASE_T: + case XGBE_PORT_MODE_10GBASE_R: + if (phy_data->conn_type == XGBE_CONN_TYPE_MDIO) + return false; + break; + case XGBE_PORT_MODE_SFP: + if (phy_data->conn_type == XGBE_CONN_TYPE_SFP) + return false; + break; + default: + break; + } + + return true; +} + +static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata) +{ + unsigned int reg; + + reg = XP_IOREAD(pdata, XP_PROP_0); + if (!XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS)) + return false; + if (!XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE)) + return false; + + return true; +} + +static void xgbe_phy_stop(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* If we have an external PHY, free it */ + xgbe_phy_free_phy_device(pdata); + + /* Reset SFP data */ + xgbe_phy_sfp_reset(phy_data); + xgbe_phy_sfp_mod_absent(pdata); + + /* Power off the PHY */ + xgbe_phy_power_off(pdata); + + /* Stop the I2C controller */ + pdata->i2c_if.i2c_stop(pdata); +} + +static int xgbe_phy_start(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + int ret; + + /* Start the I2C controller */ + ret = pdata->i2c_if.i2c_start(pdata); + if (ret) + return ret; + + /* Start in highest supported mode */ + xgbe_phy_set_mode(pdata, phy_data->start_mode); + + /* After starting the I2C controller, we can check for an SFP */ + switch (phy_data->port_mode) { + case XGBE_PORT_MODE_SFP: + xgbe_phy_sfp_detect(pdata); + break; + default: + break; + } + + /* If we have an external PHY, start it */ + ret = xgbe_phy_find_phy_device(pdata); + if (ret) + goto err_i2c; + + return 0; + +err_i2c: + pdata->i2c_if.i2c_stop(pdata); + + return ret; +} + +static int xgbe_phy_reset(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + enum xgbe_mode cur_mode; + int ret; + + /* Reset by power cycling the PHY */ + cur_mode = phy_data->cur_mode; + xgbe_phy_power_off(pdata); + xgbe_phy_set_mode(pdata, cur_mode); + + if (!phy_data->phydev) + return 0; + + /* Reset the external PHY */ + ret = xgbe_phy_mdio_reset(pdata); + if (ret) + return ret; + + return phy_init_hw(phy_data->phydev); +} + +static void xgbe_phy_exit(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + /* Unregister for driving external PHYs */ + mdiobus_unregister(phy_data->mii); +} + +static int xgbe_phy_init(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data; + struct mii_bus *mii; + unsigned int reg; + int ret; + + /* Check if enabled */ + if (!xgbe_phy_port_enabled(pdata)) { + dev_info(pdata->dev, "device is not enabled\n"); + return -ENODEV; + } + + /* Initialize the I2C controller */ + ret = pdata->i2c_if.i2c_init(pdata); + if (ret) + return ret; + + phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL); + if (!phy_data) + return -ENOMEM; + pdata->phy_data = phy_data; + + reg = XP_IOREAD(pdata, XP_PROP_0); + phy_data->port_mode = XP_GET_BITS(reg, XP_PROP_0, PORT_MODE); + phy_data->port_id = XP_GET_BITS(reg, XP_PROP_0, PORT_ID); + phy_data->port_speeds = XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS); + phy_data->conn_type = XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE); + phy_data->mdio_addr = XP_GET_BITS(reg, XP_PROP_0, MDIO_ADDR); + if (netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "port mode=%u\n", phy_data->port_mode); + dev_dbg(pdata->dev, "port id=%u\n", phy_data->port_id); + dev_dbg(pdata->dev, "port speeds=%#x\n", phy_data->port_speeds); + dev_dbg(pdata->dev, "conn type=%u\n", phy_data->conn_type); + dev_dbg(pdata->dev, "mdio addr=%u\n", phy_data->mdio_addr); + } + + reg = XP_IOREAD(pdata, XP_PROP_4); + phy_data->redrv = XP_GET_BITS(reg, XP_PROP_4, REDRV_PRESENT); + phy_data->redrv_if = XP_GET_BITS(reg, XP_PROP_4, REDRV_IF); + phy_data->redrv_addr = XP_GET_BITS(reg, XP_PROP_4, REDRV_ADDR); + phy_data->redrv_lane = XP_GET_BITS(reg, XP_PROP_4, REDRV_LANE); + phy_data->redrv_model = XP_GET_BITS(reg, XP_PROP_4, REDRV_MODEL); + if (phy_data->redrv && netif_msg_probe(pdata)) { + dev_dbg(pdata->dev, "redrv present\n"); + dev_dbg(pdata->dev, "redrv i/f=%u\n", phy_data->redrv_if); + dev_dbg(pdata->dev, "redrv addr=%#x\n", phy_data->redrv_addr); + dev_dbg(pdata->dev, "redrv lane=%u\n", phy_data->redrv_lane); + dev_dbg(pdata->dev, "redrv model=%u\n", phy_data->redrv_model); + } + + /* Validate the connection requested */ + if (xgbe_phy_conn_type_mismatch(pdata)) { + dev_err(pdata->dev, "phy mode/connection mismatch (%#x/%#x)\n", + phy_data->port_mode, phy_data->conn_type); + } + + /* Validate the mode requested */ + if (xgbe_phy_port_mode_mismatch(pdata)) { + dev_err(pdata->dev, "phy mode/speed mismatch (%#x/%#x)\n", + phy_data->port_mode, phy_data->port_speeds); + return -EINVAL; + } + + /* Check for and validate MDIO reset support */ + ret = xgbe_phy_mdio_reset_setup(pdata); + if (ret) + return ret; + + /* Validate the re-driver information */ + if (xgbe_phy_redrv_error(phy_data)) { + dev_err(pdata->dev, "phy re-driver settings error\n"); + return -EINVAL; + } + pdata->kr_redrv = phy_data->redrv; + + /* Indicate current mode is unknown */ + phy_data->cur_mode = XGBE_MODE_UNKNOWN; + + /* Initialize supported features */ + pdata->phy.supported = 0; + + switch (phy_data->port_mode) { + /* Backplane support */ + case XGBE_PORT_MODE_BACKPLANE: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_Backplane; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseKX_Full; + phy_data->start_mode = XGBE_MODE_KX_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { + pdata->phy.supported |= SUPPORTED_10000baseKR_Full; + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= + SUPPORTED_10000baseR_FEC; + phy_data->start_mode = XGBE_MODE_KR; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + case XGBE_PORT_MODE_BACKPLANE_2500: + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_Backplane; + pdata->phy.supported |= SUPPORTED_2500baseX_Full; + phy_data->start_mode = XGBE_MODE_KX_2500; + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + + /* MDIO 1GBase-T support */ + case XGBE_PORT_MODE_1000BASE_T: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; + break; + + /* MDIO Base-X support */ + case XGBE_PORT_MODE_1000BASE_X: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_FIBRE; + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_X; + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; + break; + + /* MDIO NBase-T support */ + case XGBE_PORT_MODE_NBASE_T: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) { + pdata->phy.supported |= SUPPORTED_2500baseX_Full; + phy_data->start_mode = XGBE_MODE_KX_2500; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL45; + break; + + /* 10GBase-T support */ + case XGBE_PORT_MODE_10GBASE_T: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { + pdata->phy.supported |= SUPPORTED_10000baseT_Full; + phy_data->start_mode = XGBE_MODE_KR; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + + /* 10GBase-R support */ + case XGBE_PORT_MODE_10GBASE_R: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + pdata->phy.supported |= SUPPORTED_10000baseT_Full; + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= SUPPORTED_10000baseR_FEC; + phy_data->start_mode = XGBE_MODE_SFI; + + phy_data->phydev_mode = XGBE_MDIO_MODE_NONE; + break; + + /* SFP support */ + case XGBE_PORT_MODE_SFP: + pdata->phy.supported |= SUPPORTED_Autoneg; + pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + pdata->phy.supported |= SUPPORTED_TP; + pdata->phy.supported |= SUPPORTED_FIBRE; + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { + pdata->phy.supported |= SUPPORTED_100baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_100; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) { + pdata->phy.supported |= SUPPORTED_1000baseT_Full; + phy_data->start_mode = XGBE_MODE_SGMII_1000; + } + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) { + pdata->phy.supported |= SUPPORTED_10000baseT_Full; + phy_data->start_mode = XGBE_MODE_SFI; + if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE) + pdata->phy.supported |= + SUPPORTED_10000baseR_FEC; + } + + phy_data->phydev_mode = XGBE_MDIO_MODE_CL22; + + xgbe_phy_sfp_setup(pdata); + break; + default: + return -EINVAL; + } + + if (netif_msg_probe(pdata)) + dev_dbg(pdata->dev, "phy supported=%#x\n", + pdata->phy.supported); + + if ((phy_data->conn_type & XGBE_CONN_TYPE_MDIO) && + (phy_data->phydev_mode != XGBE_MDIO_MODE_NONE)) { + ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr, + phy_data->phydev_mode); + if (ret) { + dev_err(pdata->dev, + "mdio port/clause not compatible (%d/%u)\n", + phy_data->mdio_addr, phy_data->phydev_mode); + return -EINVAL; + } + } + + if (phy_data->redrv && !phy_data->redrv_if) { + ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr, + XGBE_MDIO_MODE_CL22); + if (ret) { + dev_err(pdata->dev, + "redriver mdio port not compatible (%u)\n", + phy_data->redrv_addr); + return -EINVAL; + } + } + + /* Register for driving external PHYs */ + mii = devm_mdiobus_alloc(pdata->dev); + if (!mii) { + dev_err(pdata->dev, "mdiobus_alloc failed\n"); + return -ENOMEM; + } + + mii->priv = pdata; + mii->name = "amd-xgbe-mii"; + mii->read = xgbe_phy_mii_read; + mii->write = xgbe_phy_mii_write; + mii->parent = pdata->dev; + mii->phy_mask = ~0; + snprintf(mii->id, sizeof(mii->id), "%s", dev_name(pdata->dev)); + ret = mdiobus_register(mii); + if (ret) { + dev_err(pdata->dev, "mdiobus_register failed\n"); + return ret; + } + phy_data->mii = mii; + + return 0; +} + +void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if) +{ + struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl; + + phy_impl->init = xgbe_phy_init; + phy_impl->exit = xgbe_phy_exit; + + phy_impl->reset = xgbe_phy_reset; + phy_impl->start = xgbe_phy_start; + phy_impl->stop = xgbe_phy_stop; + + phy_impl->link_status = xgbe_phy_link_status; + + phy_impl->valid_speed = xgbe_phy_valid_speed; + + phy_impl->use_mode = xgbe_phy_use_mode; + phy_impl->set_mode = xgbe_phy_set_mode; + phy_impl->get_mode = xgbe_phy_get_mode; + phy_impl->switch_mode = xgbe_phy_switch_mode; + phy_impl->cur_mode = xgbe_phy_cur_mode; + + phy_impl->an_mode = xgbe_phy_an_mode; + + phy_impl->an_config = xgbe_phy_an_config; + + phy_impl->an_advertising = xgbe_phy_an_advertising; + + phy_impl->an_outcome = xgbe_phy_an_outcome; +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c new file mode 100644 index 000000000000..8c530dccb447 --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c @@ -0,0 +1,642 @@ +/* + * AMD 10Gb Ethernet driver + * + * This file is available to you under your choice of the following two + * licenses: + * + * License 1: GPLv2 + * + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. + * + * This file is free software; you may copy, redistribute and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or (at + * your option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + * + * + * License 2: Modified BSD + * + * Copyright (c) 2014-2016 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Advanced Micro Devices, Inc. nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * The Synopsys DWC ETHER XGMAC Software Driver and documentation + * (hereinafter "Software") is an unsupported proprietary work of Synopsys, + * Inc. unless otherwise expressly agreed to in writing between Synopsys + * and you. + * + * The Software IS NOT an item of Licensed Software or Licensed Product + * under any End User Software License Agreement or Agreement for Licensed + * Product with Synopsys or any supplement thereto. Permission is hereby + * granted, free of charge, to any person obtaining a copy of this software + * annotated with this license and the Software, to deal in the Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS" + * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_net.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include <linux/of_device.h> +#include <linux/clk.h> +#include <linux/property.h> +#include <linux/acpi.h> +#include <linux/mdio.h> + +#include "xgbe.h" +#include "xgbe-common.h" + +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgbe_acpi_match[]; + +static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata) +{ + const struct acpi_device_id *id; + + id = acpi_match_device(xgbe_acpi_match, pdata->dev); + + return id ? (struct xgbe_version_data *)id->driver_data : NULL; +} + +static int xgbe_acpi_support(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + u32 property; + int ret; + + /* Obtain the system clock setting */ + ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property); + if (ret) { + dev_err(dev, "unable to obtain %s property\n", + XGBE_ACPI_DMA_FREQ); + return ret; + } + pdata->sysclk_rate = property; + + /* Obtain the PTP clock setting */ + ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property); + if (ret) { + dev_err(dev, "unable to obtain %s property\n", + XGBE_ACPI_PTP_FREQ); + return ret; + } + pdata->ptpclk_rate = property; + + return 0; +} +#else /* CONFIG_ACPI */ +static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata) +{ + return NULL; +} + +static int xgbe_acpi_support(struct xgbe_prv_data *pdata) +{ + return -EINVAL; +} +#endif /* CONFIG_ACPI */ + +#ifdef CONFIG_OF +static const struct of_device_id xgbe_of_match[]; + +static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata) +{ + const struct of_device_id *id; + + id = of_match_device(xgbe_of_match, pdata->dev); + + return id ? (struct xgbe_version_data *)id->data : NULL; +} + +static int xgbe_of_support(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + + /* Obtain the system clock setting */ + pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK); + if (IS_ERR(pdata->sysclk)) { + dev_err(dev, "dma devm_clk_get failed\n"); + return PTR_ERR(pdata->sysclk); + } + pdata->sysclk_rate = clk_get_rate(pdata->sysclk); + + /* Obtain the PTP clock setting */ + pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK); + if (IS_ERR(pdata->ptpclk)) { + dev_err(dev, "ptp devm_clk_get failed\n"); + return PTR_ERR(pdata->ptpclk); + } + pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk); + + return 0; +} + +static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + struct device *dev = pdata->dev; + struct device_node *phy_node; + struct platform_device *phy_pdev; + + phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0); + if (phy_node) { + /* Old style device tree: + * The XGBE and PHY resources are separate + */ + phy_pdev = of_find_device_by_node(phy_node); + of_node_put(phy_node); + } else { + /* New style device tree: + * The XGBE and PHY resources are grouped together with + * the PHY resources listed last + */ + get_device(dev); + phy_pdev = pdata->platdev; + } + + return phy_pdev; +} +#else /* CONFIG_OF */ +static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata) +{ + return NULL; +} + +static int xgbe_of_support(struct xgbe_prv_data *pdata) +{ + return -EINVAL; +} + +static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + return NULL; +} +#endif /* CONFIG_OF */ + +static unsigned int xgbe_resource_count(struct platform_device *pdev, + unsigned int type) +{ + unsigned int count; + int i; + + for (i = 0, count = 0; i < pdev->num_resources; i++) { + struct resource *res = &pdev->resource[i]; + + if (type == resource_type(res)) + count++; + } + + return count; +} + +static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata) +{ + struct platform_device *phy_pdev; + + if (pdata->use_acpi) { + get_device(pdata->dev); + phy_pdev = pdata->platdev; + } else { + phy_pdev = xgbe_of_get_phy_pdev(pdata); + } + + return phy_pdev; +} + +static struct xgbe_version_data *xgbe_get_vdata(struct xgbe_prv_data *pdata) +{ + return pdata->use_acpi ? xgbe_acpi_vdata(pdata) + : xgbe_of_vdata(pdata); +} + +static int xgbe_platform_probe(struct platform_device *pdev) +{ + struct xgbe_prv_data *pdata; + struct device *dev = &pdev->dev; + struct platform_device *phy_pdev; + struct resource *res; + const char *phy_mode; + unsigned int phy_memnum, phy_irqnum; + unsigned int dma_irqnum, dma_irqend; + enum dev_dma_attr attr; + int ret; + + pdata = xgbe_alloc_pdata(dev); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto err_alloc; + } + + pdata->platdev = pdev; + pdata->adev = ACPI_COMPANION(dev); + platform_set_drvdata(pdev, pdata); + + /* Check if we should use ACPI or DT */ + pdata->use_acpi = dev->of_node ? 0 : 1; + + /* Get the version data */ + pdata->vdata = xgbe_get_vdata(pdata); + + phy_pdev = xgbe_get_phy_pdev(pdata); + if (!phy_pdev) { + dev_err(dev, "unable to obtain phy device\n"); + ret = -EINVAL; + goto err_phydev; + } + pdata->phy_platdev = phy_pdev; + pdata->phy_dev = &phy_pdev->dev; + + if (pdev == phy_pdev) { + /* New style device tree or ACPI: + * The XGBE and PHY resources are grouped together with + * the PHY resources listed last + */ + phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3; + phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1; + dma_irqnum = 1; + dma_irqend = phy_irqnum; + } else { + /* Old style device tree: + * The XGBE and PHY resources are separate + */ + phy_memnum = 0; + phy_irqnum = 0; + dma_irqnum = 1; + dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ); + } + + /* Obtain the mmio areas for the device */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pdata->xgmac_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->xgmac_regs)) { + dev_err(dev, "xgmac ioremap failed\n"); + ret = PTR_ERR(pdata->xgmac_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + pdata->xpcs_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->xpcs_regs)) { + dev_err(dev, "xpcs ioremap failed\n"); + ret = PTR_ERR(pdata->xpcs_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "xpcs_regs = %p\n", pdata->xpcs_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->rxtx_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->rxtx_regs)) { + dev_err(dev, "rxtx ioremap failed\n"); + ret = PTR_ERR(pdata->rxtx_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "rxtx_regs = %p\n", pdata->rxtx_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->sir0_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->sir0_regs)) { + dev_err(dev, "sir0 ioremap failed\n"); + ret = PTR_ERR(pdata->sir0_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "sir0_regs = %p\n", pdata->sir0_regs); + + res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++); + pdata->sir1_regs = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->sir1_regs)) { + dev_err(dev, "sir1 ioremap failed\n"); + ret = PTR_ERR(pdata->sir1_regs); + goto err_io; + } + if (netif_msg_probe(pdata)) + dev_dbg(dev, "sir1_regs = %p\n", pdata->sir1_regs); + + /* Retrieve the MAC address */ + ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY, + pdata->mac_addr, + sizeof(pdata->mac_addr)); + if (ret || !is_valid_ether_addr(pdata->mac_addr)) { + dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY); + if (!ret) + ret = -EINVAL; + goto err_io; + } + + /* Retrieve the PHY mode - it must be "xgmii" */ + ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY, + &phy_mode); + if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) { + dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY); + if (!ret) + ret = -EINVAL; + goto err_io; + } + pdata->phy_mode = PHY_INTERFACE_MODE_XGMII; + + /* Check for per channel interrupt support */ + if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) { + pdata->per_channel_irq = 1; + pdata->channel_irq_mode = XGBE_IRQ_MODE_EDGE; + } + + /* Obtain device settings unique to ACPI/OF */ + if (pdata->use_acpi) + ret = xgbe_acpi_support(pdata); + else + ret = xgbe_of_support(pdata); + if (ret) + goto err_io; + + /* Set the DMA coherency values */ + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + ret = -ENODEV; + goto err_io; + } + pdata->coherent = (attr == DEV_DMA_COHERENT); + if (pdata->coherent) { + pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; + pdata->arcache = XGBE_DMA_OS_ARCACHE; + pdata->awcache = XGBE_DMA_OS_AWCACHE; + } else { + pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN; + pdata->arcache = XGBE_DMA_SYS_ARCACHE; + pdata->awcache = XGBE_DMA_SYS_AWCACHE; + } + + /* Set the maximum fifo amounts */ + pdata->tx_max_fifo_size = pdata->vdata->tx_max_fifo_size; + pdata->rx_max_fifo_size = pdata->vdata->rx_max_fifo_size; + + /* Set the hardware channel and queue counts */ + xgbe_set_counts(pdata); + + /* Always have XGMAC and XPCS (auto-negotiation) interrupts */ + pdata->irq_count = 2; + + /* Get the device interrupt */ + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_err(dev, "platform_get_irq 0 failed\n"); + goto err_io; + } + pdata->dev_irq = ret; + + /* Get the per channel DMA interrupts */ + if (pdata->per_channel_irq) { + unsigned int i, max = ARRAY_SIZE(pdata->channel_irq); + + for (i = 0; (i < max) && (dma_irqnum < dma_irqend); i++) { + ret = platform_get_irq(pdata->platdev, dma_irqnum++); + if (ret < 0) { + netdev_err(pdata->netdev, + "platform_get_irq %u failed\n", + dma_irqnum - 1); + goto err_io; + } + + pdata->channel_irq[i] = ret; + } + + pdata->channel_irq_count = max; + + pdata->irq_count += max; + } + + /* Get the auto-negotiation interrupt */ + ret = platform_get_irq(phy_pdev, phy_irqnum++); + if (ret < 0) { + dev_err(dev, "platform_get_irq phy 0 failed\n"); + goto err_io; + } + pdata->an_irq = ret; + + /* Configure the netdev resource */ + ret = xgbe_config_netdev(pdata); + if (ret) + goto err_io; + + netdev_notice(pdata->netdev, "net device enabled\n"); + + return 0; + +err_io: + platform_device_put(phy_pdev); + +err_phydev: + xgbe_free_pdata(pdata); + +err_alloc: + dev_notice(dev, "net device not enabled\n"); + + return ret; +} + +static int xgbe_platform_remove(struct platform_device *pdev) +{ + struct xgbe_prv_data *pdata = platform_get_drvdata(pdev); + + xgbe_deconfig_netdev(pdata); + + platform_device_put(pdata->phy_platdev); + + xgbe_free_pdata(pdata); + + return 0; +} + +#ifdef CONFIG_PM +static int xgbe_platform_suspend(struct device *dev) +{ + struct xgbe_prv_data *pdata = dev_get_drvdata(dev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + DBGPR("-->xgbe_suspend\n"); + + if (netif_running(netdev)) + ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT); + + pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1); + pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + DBGPR("<--xgbe_suspend\n"); + + return ret; +} + +static int xgbe_platform_resume(struct device *dev) +{ + struct xgbe_prv_data *pdata = dev_get_drvdata(dev); + struct net_device *netdev = pdata->netdev; + int ret = 0; + + DBGPR("-->xgbe_resume\n"); + + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; + XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); + + if (netif_running(netdev)) { + ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + + /* Schedule a restart in case the link or phy state changed + * while we were powered down. + */ + schedule_work(&pdata->restart_work); + } + + DBGPR("<--xgbe_resume\n"); + + return ret; +} +#endif /* CONFIG_PM */ + +static const struct xgbe_version_data xgbe_v1 = { + .init_function_ptrs_phy_impl = xgbe_init_function_ptrs_phy_v1, + .xpcs_access = XGBE_XPCS_ACCESS_V1, + .tx_max_fifo_size = 81920, + .rx_max_fifo_size = 81920, + .tx_tstamp_workaround = 1, +}; + +#ifdef CONFIG_ACPI +static const struct acpi_device_id xgbe_acpi_match[] = { + { .id = "AMDI8001", + .driver_data = (kernel_ulong_t)&xgbe_v1 }, + {}, +}; + +MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match); +#endif + +#ifdef CONFIG_OF +static const struct of_device_id xgbe_of_match[] = { + { .compatible = "amd,xgbe-seattle-v1a", + .data = &xgbe_v1 }, + {}, +}; + +MODULE_DEVICE_TABLE(of, xgbe_of_match); +#endif + +static SIMPLE_DEV_PM_OPS(xgbe_platform_pm_ops, + xgbe_platform_suspend, xgbe_platform_resume); + +static struct platform_driver xgbe_driver = { + .driver = { + .name = XGBE_DRV_NAME, +#ifdef CONFIG_ACPI + .acpi_match_table = xgbe_acpi_match, +#endif +#ifdef CONFIG_OF + .of_match_table = xgbe_of_match, +#endif + .pm = &xgbe_platform_pm_ops, + }, + .probe = xgbe_platform_probe, + .remove = xgbe_platform_remove, +}; + +int xgbe_platform_init(void) +{ + return platform_driver_register(&xgbe_driver); +} + +void xgbe_platform_exit(void) +{ + platform_driver_unregister(&xgbe_driver); +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 5dd17dcea2f8..f52a9bd05bac 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -127,9 +127,10 @@ #include <linux/timecounter.h> #include <linux/net_tstamp.h> #include <net/dcbnl.h> +#include <linux/completion.h> #define XGBE_DRV_NAME "amd-xgbe" -#define XGBE_DRV_VERSION "1.0.2" +#define XGBE_DRV_VERSION "1.0.3" #define XGBE_DRV_DESC "AMD 10 Gigabit Ethernet Driver" /* Descriptor related defines */ @@ -158,7 +159,8 @@ #define XGBE_MAX_DMA_CHANNELS 16 #define XGBE_MAX_QUEUES 16 -#define XGBE_DMA_STOP_TIMEOUT 5 +#define XGBE_PRIORITY_QUEUES 8 +#define XGBE_DMA_STOP_TIMEOUT 1 /* DMA cache settings - Outer sharable, write-back, write-allocate */ #define XGBE_DMA_OS_AXDOMAIN 0x2 @@ -170,6 +172,10 @@ #define XGBE_DMA_SYS_ARCACHE 0x0 #define XGBE_DMA_SYS_AWCACHE 0x0 +/* DMA channel interrupt modes */ +#define XGBE_IRQ_MODE_EDGE 0 +#define XGBE_IRQ_MODE_LEVEL 1 + #define XGBE_DMA_INTERRUPT_MASK 0x31c7 #define XGMAC_MIN_PACKET 60 @@ -177,18 +183,19 @@ #define XGMAC_MAX_STD_PACKET 1518 #define XGMAC_JUMBO_PACKET_MTU 9000 #define XGMAC_MAX_JUMBO_PACKET 9018 +#define XGMAC_ETH_PREAMBLE (12 + 8) /* Inter-frame gap + preamble */ + +#define XGMAC_PFC_DATA_LEN 46 +#define XGMAC_PFC_DELAYS 14000 + +#define XGMAC_PRIO_QUEUES(_cnt) \ + min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, (_cnt)) /* Common property names */ #define XGBE_MAC_ADDR_PROPERTY "mac-address" #define XGBE_PHY_MODE_PROPERTY "phy-mode" #define XGBE_DMA_IRQS_PROPERTY "amd,per-channel-interrupt" #define XGBE_SPEEDSET_PROPERTY "amd,speed-set" -#define XGBE_BLWC_PROPERTY "amd,serdes-blwc" -#define XGBE_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" -#define XGBE_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" -#define XGBE_TX_AMP_PROPERTY "amd,serdes-tx-amp" -#define XGBE_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" -#define XGBE_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" /* Device-tree clock names */ #define XGBE_DMA_CLOCK "dma_clk" @@ -198,6 +205,20 @@ #define XGBE_ACPI_DMA_FREQ "amd,dma-freq" #define XGBE_ACPI_PTP_FREQ "amd,ptp-freq" +/* PCI BAR mapping */ +#define XGBE_XGMAC_BAR 0 +#define XGBE_XPCS_BAR 1 +#define XGBE_MAC_PROP_OFFSET 0x1d000 +#define XGBE_I2C_CTRL_OFFSET 0x1e000 + +/* PCI MSIx support */ +#define XGBE_MSIX_BASE_COUNT 4 +#define XGBE_MSIX_MIN_COUNT (XGBE_MSIX_BASE_COUNT + 1) + +/* PCI clock frequencies */ +#define XGBE_V2_DMA_CLOCK_FREQ 500000000 /* 500 MHz */ +#define XGBE_V2_PTP_CLOCK_FREQ 125000000 /* 125 MHz */ + /* Timestamp support - values based on 50MHz PTP clock * 50MHz => 20 nsec */ @@ -208,7 +229,12 @@ #define XGMAC_DRIVER_CONTEXT 1 #define XGMAC_IOCTL_CONTEXT 2 -#define XGBE_FIFO_MAX 81920 +#define XGMAC_FIFO_MIN_ALLOC 2048 +#define XGMAC_FIFO_UNIT 256 +#define XGMAC_FIFO_ALIGN(_x) \ + (((_x) + XGMAC_FIFO_UNIT - 1) & ~(XGMAC_FIFO_UNIT - 1)) +#define XGMAC_FIFO_FC_OFF 2048 +#define XGMAC_FIFO_FC_MIN 4096 #define XGBE_TC_MIN_QUANTUM 10 @@ -233,6 +259,14 @@ /* Flow control queue count */ #define XGMAC_MAX_FLOW_CONTROL_QUEUES 8 +/* Flow control threshold units */ +#define XGMAC_FLOW_CONTROL_UNIT 512 +#define XGMAC_FLOW_CONTROL_ALIGN(_x) \ + (((_x) + XGMAC_FLOW_CONTROL_UNIT - 1) & ~(XGMAC_FLOW_CONTROL_UNIT - 1)) +#define XGMAC_FLOW_CONTROL_VALUE(_x) \ + (((_x) < 1024) ? 0 : ((_x) / XGMAC_FLOW_CONTROL_UNIT) - 2) +#define XGMAC_FLOW_CONTROL_MAX 33280 + /* Maximum MAC address hash table size (256 bits = 8 bytes) */ #define XGBE_MAC_HASH_TABLE_SIZE 8 @@ -244,46 +278,19 @@ /* Auto-negotiation */ #define XGBE_AN_MS_TIMEOUT 500 -#define XGBE_LINK_TIMEOUT 10 - -#define XGBE_AN_INT_CMPLT 0x01 -#define XGBE_AN_INC_LINK 0x02 -#define XGBE_AN_PG_RCV 0x04 -#define XGBE_AN_INT_MASK 0x07 - -/* Rate-change complete wait/retry count */ -#define XGBE_RATECHANGE_COUNT 500 - -/* Default SerDes settings */ -#define XGBE_SPEED_10000_BLWC 0 -#define XGBE_SPEED_10000_CDR 0x7 -#define XGBE_SPEED_10000_PLL 0x1 -#define XGBE_SPEED_10000_PQ 0x12 -#define XGBE_SPEED_10000_RATE 0x0 -#define XGBE_SPEED_10000_TXAMP 0xa -#define XGBE_SPEED_10000_WORD 0x7 -#define XGBE_SPEED_10000_DFE_TAP_CONFIG 0x1 -#define XGBE_SPEED_10000_DFE_TAP_ENABLE 0x7f - -#define XGBE_SPEED_2500_BLWC 1 -#define XGBE_SPEED_2500_CDR 0x2 -#define XGBE_SPEED_2500_PLL 0x0 -#define XGBE_SPEED_2500_PQ 0xa -#define XGBE_SPEED_2500_RATE 0x1 -#define XGBE_SPEED_2500_TXAMP 0xf -#define XGBE_SPEED_2500_WORD 0x1 -#define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3 -#define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0 - -#define XGBE_SPEED_1000_BLWC 1 -#define XGBE_SPEED_1000_CDR 0x2 -#define XGBE_SPEED_1000_PLL 0x0 -#define XGBE_SPEED_1000_PQ 0xa -#define XGBE_SPEED_1000_RATE 0x3 -#define XGBE_SPEED_1000_TXAMP 0xf -#define XGBE_SPEED_1000_WORD 0x1 -#define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3 -#define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0 +#define XGBE_LINK_TIMEOUT 5 + +#define XGBE_SGMII_AN_LINK_STATUS BIT(1) +#define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) +#define XGBE_SGMII_AN_LINK_SPEED_100 0x04 +#define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 +#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) + +/* ECC correctable error notification window (seconds) */ +#define XGBE_ECC_LIMIT 60 + +/* MDIO port types */ +#define XGMAC_MAX_C22_PORT 3 struct xgbe_prv_data; @@ -461,6 +468,7 @@ enum xgbe_state { XGBE_DOWN, XGBE_LINK_INIT, XGBE_LINK_ERR, + XGBE_STOPPED, }; enum xgbe_int { @@ -480,6 +488,12 @@ enum xgbe_int_state { XGMAC_INT_STATE_RESTORE, }; +enum xgbe_ecc_sec { + XGBE_ECC_SEC_TX, + XGBE_ECC_SEC_RX, + XGBE_ECC_SEC_DESC, +}; + enum xgbe_speed { XGBE_SPEED_1000 = 0, XGBE_SPEED_2500, @@ -487,6 +501,19 @@ enum xgbe_speed { XGBE_SPEEDS, }; +enum xgbe_xpcs_access { + XGBE_XPCS_ACCESS_V1 = 0, + XGBE_XPCS_ACCESS_V2, +}; + +enum xgbe_an_mode { + XGBE_AN_MODE_CL73 = 0, + XGBE_AN_MODE_CL73_REDRV, + XGBE_AN_MODE_CL37, + XGBE_AN_MODE_CL37_SGMII, + XGBE_AN_MODE_NONE, +}; + enum xgbe_an { XGBE_AN_READY = 0, XGBE_AN_PAGE_RECEIVED, @@ -504,8 +531,14 @@ enum xgbe_rx { }; enum xgbe_mode { - XGBE_MODE_KR = 0, - XGBE_MODE_KX, + XGBE_MODE_KX_1000 = 0, + XGBE_MODE_KX_2500, + XGBE_MODE_KR, + XGBE_MODE_X, + XGBE_MODE_SGMII_100, + XGBE_MODE_SGMII_1000, + XGBE_MODE_SFI, + XGBE_MODE_UNKNOWN, }; enum xgbe_speedset { @@ -513,6 +546,12 @@ enum xgbe_speedset { XGBE_SPEEDSET_2500_10000, }; +enum xgbe_mdio_mode { + XGBE_MDIO_MODE_NONE = 0, + XGBE_MDIO_MODE_CL22, + XGBE_MDIO_MODE_CL45, +}; + struct xgbe_phy { u32 supported; u32 advertising; @@ -531,6 +570,43 @@ struct xgbe_phy { int rx_pause; }; +enum xgbe_i2c_cmd { + XGBE_I2C_CMD_READ = 0, + XGBE_I2C_CMD_WRITE, +}; + +struct xgbe_i2c_op { + enum xgbe_i2c_cmd cmd; + + unsigned int target; + + void *buf; + unsigned int len; +}; + +struct xgbe_i2c_op_state { + struct xgbe_i2c_op *op; + + unsigned int tx_len; + unsigned char *tx_buf; + + unsigned int rx_len; + unsigned char *rx_buf; + + unsigned int tx_abort_source; + + int ret; +}; + +struct xgbe_i2c { + unsigned int started; + unsigned int max_speed_mode; + unsigned int rx_fifo_size; + unsigned int tx_fifo_size; + + struct xgbe_i2c_op_state op_state; +}; + struct xgbe_mmc_stats { /* Tx Stats */ u64 txoctetcount_gb; @@ -601,9 +677,15 @@ struct xgbe_hw_if { int (*read_mmd_regs)(struct xgbe_prv_data *, int, int); void (*write_mmd_regs)(struct xgbe_prv_data *, int, int, int); - int (*set_gmii_speed)(struct xgbe_prv_data *); - int (*set_gmii_2500_speed)(struct xgbe_prv_data *); - int (*set_xgmii_speed)(struct xgbe_prv_data *); + int (*set_speed)(struct xgbe_prv_data *, int); + + int (*set_ext_mii_mode)(struct xgbe_prv_data *, unsigned int, + enum xgbe_mdio_mode); + int (*read_ext_mii_regs)(struct xgbe_prv_data *, int, int); + int (*write_ext_mii_regs)(struct xgbe_prv_data *, int, int, u16); + + int (*set_gpio)(struct xgbe_prv_data *, unsigned int); + int (*clr_gpio)(struct xgbe_prv_data *, unsigned int); void (*enable_tx)(struct xgbe_prv_data *); void (*disable_tx)(struct xgbe_prv_data *); @@ -682,11 +764,65 @@ struct xgbe_hw_if { int (*disable_rss)(struct xgbe_prv_data *); int (*set_rss_hash_key)(struct xgbe_prv_data *, const u8 *); int (*set_rss_lookup_table)(struct xgbe_prv_data *, const u32 *); + + /* For ECC */ + void (*disable_ecc_ded)(struct xgbe_prv_data *); + void (*disable_ecc_sec)(struct xgbe_prv_data *, enum xgbe_ecc_sec); +}; + +/* This structure represents implementation specific routines for an + * implementation of a PHY. All routines are required unless noted below. + * Optional routines: + * kr_training_pre, kr_training_post + */ +struct xgbe_phy_impl_if { + /* Perform Setup/teardown actions */ + int (*init)(struct xgbe_prv_data *); + void (*exit)(struct xgbe_prv_data *); + + /* Perform start/stop specific actions */ + int (*reset)(struct xgbe_prv_data *); + int (*start)(struct xgbe_prv_data *); + void (*stop)(struct xgbe_prv_data *); + + /* Return the link status */ + int (*link_status)(struct xgbe_prv_data *, int *); + + /* Indicate if a particular speed is valid */ + bool (*valid_speed)(struct xgbe_prv_data *, int); + + /* Check if the specified mode can/should be used */ + bool (*use_mode)(struct xgbe_prv_data *, enum xgbe_mode); + /* Switch the PHY into various modes */ + void (*set_mode)(struct xgbe_prv_data *, enum xgbe_mode); + /* Retrieve mode needed for a specific speed */ + enum xgbe_mode (*get_mode)(struct xgbe_prv_data *, int); + /* Retrieve new/next mode when trying to auto-negotiate */ + enum xgbe_mode (*switch_mode)(struct xgbe_prv_data *); + /* Retrieve current mode */ + enum xgbe_mode (*cur_mode)(struct xgbe_prv_data *); + + /* Retrieve current auto-negotiation mode */ + enum xgbe_an_mode (*an_mode)(struct xgbe_prv_data *); + + /* Configure auto-negotiation settings */ + int (*an_config)(struct xgbe_prv_data *); + + /* Set/override auto-negotiation advertisement settings */ + unsigned int (*an_advertising)(struct xgbe_prv_data *); + + /* Process results of auto-negotiation */ + enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *); + + /* Pre/Post KR training enablement support */ + void (*kr_training_pre)(struct xgbe_prv_data *); + void (*kr_training_post)(struct xgbe_prv_data *); }; struct xgbe_phy_if { - /* For initial PHY setup */ - void (*phy_init)(struct xgbe_prv_data *); + /* For PHY setup/teardown */ + int (*phy_init)(struct xgbe_prv_data *); + void (*phy_exit)(struct xgbe_prv_data *); /* For PHY support when setting device up/down */ int (*phy_reset)(struct xgbe_prv_data *); @@ -696,6 +832,30 @@ struct xgbe_phy_if { /* For PHY support while device is up */ void (*phy_status)(struct xgbe_prv_data *); int (*phy_config_aneg)(struct xgbe_prv_data *); + + /* For PHY settings validation */ + bool (*phy_valid_speed)(struct xgbe_prv_data *, int); + + /* For single interrupt support */ + irqreturn_t (*an_isr)(int, struct xgbe_prv_data *); + + /* PHY implementation specific services */ + struct xgbe_phy_impl_if phy_impl; +}; + +struct xgbe_i2c_if { + /* For initial I2C setup */ + int (*i2c_init)(struct xgbe_prv_data *); + + /* For I2C support when setting device up/down */ + int (*i2c_start)(struct xgbe_prv_data *); + void (*i2c_stop)(struct xgbe_prv_data *); + + /* For performing I2C operations */ + int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *); + + /* For single interrupt support */ + irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *); }; struct xgbe_desc_if { @@ -755,11 +915,28 @@ struct xgbe_hw_features { unsigned int aux_snap_num; /* Number of Aux snapshot inputs */ }; +struct xgbe_version_data { + void (*init_function_ptrs_phy_impl)(struct xgbe_phy_if *); + enum xgbe_xpcs_access xpcs_access; + unsigned int mmc_64bit; + unsigned int tx_max_fifo_size; + unsigned int rx_max_fifo_size; + unsigned int tx_tstamp_workaround; + unsigned int ecc_support; + unsigned int i2c_support; +}; + struct xgbe_prv_data { struct net_device *netdev; - struct platform_device *pdev; + struct pci_dev *pcidev; + struct platform_device *platdev; struct acpi_device *adev; struct device *dev; + struct platform_device *phy_platdev; + struct device *phy_dev; + + /* Version related data */ + struct xgbe_version_data *vdata; /* ACPI or DT flag */ unsigned int use_acpi; @@ -770,12 +947,17 @@ struct xgbe_prv_data { void __iomem *rxtx_regs; /* SerDes Rx/Tx CSRs */ void __iomem *sir0_regs; /* SerDes integration registers (1/2) */ void __iomem *sir1_regs; /* SerDes integration registers (2/2) */ + void __iomem *xprop_regs; /* XGBE property registers */ + void __iomem *xi2c_regs; /* XGBE I2C CSRs */ /* Overall device lock */ spinlock_t lock; /* XPCS indirect addressing lock */ spinlock_t xpcs_lock; + unsigned int xpcs_window; + unsigned int xpcs_window_size; + unsigned int xpcs_window_mask; /* RSS addressing mutex */ struct mutex rss_mutex; @@ -783,12 +965,39 @@ struct xgbe_prv_data { /* Flags representing xgbe_state */ unsigned long dev_state; + /* ECC support */ + unsigned long tx_sec_period; + unsigned long tx_ded_period; + unsigned long rx_sec_period; + unsigned long rx_ded_period; + unsigned long desc_sec_period; + unsigned long desc_ded_period; + + unsigned int tx_sec_count; + unsigned int tx_ded_count; + unsigned int rx_sec_count; + unsigned int rx_ded_count; + unsigned int desc_ded_count; + unsigned int desc_sec_count; + + struct msix_entry *msix_entries; int dev_irq; + int ecc_irq; + int i2c_irq; + int channel_irq[XGBE_MAX_DMA_CHANNELS]; + unsigned int per_channel_irq; + unsigned int irq_shared; + unsigned int irq_count; + unsigned int channel_irq_count; + unsigned int channel_irq_mode; + + char ecc_name[IFNAMSIZ + 32]; struct xgbe_hw_if hw_if; struct xgbe_phy_if phy_if; struct xgbe_desc_if desc_if; + struct xgbe_i2c_if i2c_if; /* AXI DMA settings */ unsigned int coherent; @@ -803,12 +1012,16 @@ struct xgbe_prv_data { /* Rings for Tx/Rx on a DMA channel */ struct xgbe_channel *channel; + unsigned int tx_max_channel_count; + unsigned int rx_max_channel_count; unsigned int channel_count; unsigned int tx_ring_count; unsigned int tx_desc_count; unsigned int rx_ring_count; unsigned int rx_desc_count; + unsigned int tx_max_q_count; + unsigned int rx_max_q_count; unsigned int tx_q_count; unsigned int rx_q_count; @@ -820,11 +1033,13 @@ struct xgbe_prv_data { unsigned int tx_threshold; unsigned int tx_pbl; unsigned int tx_osp_mode; + unsigned int tx_max_fifo_size; /* Rx settings */ unsigned int rx_sf_mode; unsigned int rx_threshold; unsigned int rx_pbl; + unsigned int rx_max_fifo_size; /* Tx coalescing settings */ unsigned int tx_usecs; @@ -842,6 +1057,8 @@ struct xgbe_prv_data { unsigned int pause_autoneg; unsigned int tx_pause; unsigned int rx_pause; + unsigned int rx_rfa[XGBE_MAX_QUEUES]; + unsigned int rx_rfd[XGBE_MAX_QUEUES]; /* Receive Side Scaling settings */ u8 rss_key[XGBE_RSS_HASH_KEY_SIZE]; @@ -881,13 +1098,16 @@ struct xgbe_prv_data { struct ieee_pfc *pfc; unsigned int q2tc_map[XGBE_MAX_QUEUES]; unsigned int prio2q_map[IEEE_8021QAZ_MAX_TCS]; + unsigned int pfcq[XGBE_MAX_QUEUES]; + unsigned int pfc_rfa; u8 num_tcs; /* Hardware features of the device */ struct xgbe_hw_features hw_feat; - /* Device restart work structure */ + /* Device work structures */ struct work_struct restart_work; + struct work_struct stopdev_work; /* Keeps track of power mode */ unsigned int power_down; @@ -901,9 +1121,14 @@ struct xgbe_prv_data { int phy_speed; /* MDIO/PHY related settings */ + unsigned int phy_started; + void *phy_data; struct xgbe_phy phy; int mdio_mmd; unsigned long link_check; + struct completion mdio_complete; + + unsigned int kr_redrv; char an_name[IFNAMSIZ + 32]; struct workqueue_struct *an_workqueue; @@ -911,23 +1136,9 @@ struct xgbe_prv_data { int an_irq; struct work_struct an_irq_work; - unsigned int speed_set; - - /* SerDes UEFI configurable settings. - * Switching between modes/speeds requires new values for some - * SerDes settings. The values can be supplied as device - * properties in array format. The first array entry is for - * 1GbE, second for 2.5GbE and third for 10GbE - */ - u32 serdes_blwc[XGBE_SPEEDS]; - u32 serdes_cdr_rate[XGBE_SPEEDS]; - u32 serdes_pq_skew[XGBE_SPEEDS]; - u32 serdes_tx_amp[XGBE_SPEEDS]; - u32 serdes_dfe_tap_cfg[XGBE_SPEEDS]; - u32 serdes_dfe_tap_ena[XGBE_SPEEDS]; - /* Auto-negotiation state machine support */ unsigned int an_int; + unsigned int an_status; struct mutex an_mutex; enum xgbe_an an_result; enum xgbe_an an_state; @@ -938,6 +1149,13 @@ struct xgbe_prv_data { unsigned int parallel_detect; unsigned int fec_ability; unsigned long an_start; + enum xgbe_an_mode an_mode; + + /* I2C support */ + struct xgbe_i2c i2c; + struct mutex i2c_mutex; + struct completion i2c_complete; + char i2c_name[IFNAMSIZ + 32]; unsigned int lpm_ctrl; /* CTRL1 for resume */ @@ -948,14 +1166,36 @@ struct xgbe_prv_data { unsigned int debugfs_xpcs_mmd; unsigned int debugfs_xpcs_reg; + + unsigned int debugfs_xprop_reg; + + unsigned int debugfs_xi2c_reg; #endif }; /* Function prototypes*/ +struct xgbe_prv_data *xgbe_alloc_pdata(struct device *); +void xgbe_free_pdata(struct xgbe_prv_data *); +void xgbe_set_counts(struct xgbe_prv_data *); +int xgbe_config_netdev(struct xgbe_prv_data *); +void xgbe_deconfig_netdev(struct xgbe_prv_data *); + +int xgbe_platform_init(void); +void xgbe_platform_exit(void); +#ifdef CONFIG_PCI +int xgbe_pci_init(void); +void xgbe_pci_exit(void); +#else +static inline int xgbe_pci_init(void) { return 0; } +static inline void xgbe_pci_exit(void) { } +#endif void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *); void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *); +void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *); +void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *); void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *); +void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *); const struct net_device_ops *xgbe_get_netdev_ops(void); const struct ethtool_ops *xgbe_get_ethtool_ops(void); diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c index 8e3dbd4d9f79..cb489e7e8374 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c @@ -26,73 +26,83 @@ #include "atl1e.h" -static int atl1e_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct atl1e_adapter *adapter = netdev_priv(netdev); struct atl1e_hw *hw = &adapter->hw; + u32 supported, advertising; - ecmd->supported = (SUPPORTED_10baseT_Half | + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP); if (hw->nic_type == athr_l1e) - ecmd->supported |= SUPPORTED_1000baseT_Full; + supported |= SUPPORTED_1000baseT_Full; - ecmd->advertising = ADVERTISED_TP; + advertising = ADVERTISED_TP; - ecmd->advertising |= ADVERTISED_Autoneg; - ecmd->advertising |= hw->autoneg_advertised; + advertising |= ADVERTISED_Autoneg; + advertising |= hw->autoneg_advertised; - ecmd->port = PORT_TP; - ecmd->phy_address = 0; - ecmd->transceiver = XCVR_INTERNAL; + cmd->base.port = PORT_TP; + cmd->base.phy_address = 0; if (adapter->link_speed != SPEED_0) { - ethtool_cmd_speed_set(ecmd, adapter->link_speed); + cmd->base.speed = adapter->link_speed; if (adapter->link_duplex == FULL_DUPLEX) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; } else { - ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN); - ecmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - ecmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } -static int atl1e_set_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int atl1e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct atl1e_adapter *adapter = netdev_priv(netdev); struct atl1e_hw *hw = &adapter->hw; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); while (test_and_set_bit(__AT_RESETTING, &adapter->flags)) msleep(1); - if (ecmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { u16 adv4, adv9; - if ((ecmd->advertising&ADVERTISE_1000_FULL)) { + if (advertising & ADVERTISE_1000_FULL) { if (hw->nic_type == athr_l1e) { hw->autoneg_advertised = - ecmd->advertising & AT_ADV_MASK; + advertising & AT_ADV_MASK; } else { clear_bit(__AT_RESETTING, &adapter->flags); return -EINVAL; } - } else if (ecmd->advertising&ADVERTISE_1000_HALF) { + } else if (advertising & ADVERTISE_1000_HALF) { clear_bit(__AT_RESETTING, &adapter->flags); return -EINVAL; } else { hw->autoneg_advertised = - ecmd->advertising & AT_ADV_MASK; + advertising & AT_ADV_MASK; } - ecmd->advertising = hw->autoneg_advertised | + advertising = hw->autoneg_advertised | ADVERTISED_TP | ADVERTISED_Autoneg; adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL; @@ -367,8 +377,6 @@ static int atl1e_nway_reset(struct net_device *netdev) } static const struct ethtool_ops atl1e_ethtool_ops = { - .get_settings = atl1e_get_settings, - .set_settings = atl1e_set_settings, .get_drvinfo = atl1e_get_drvinfo, .get_regs_len = atl1e_get_regs_len, .get_regs = atl1e_get_regs, @@ -380,6 +388,8 @@ static const struct ethtool_ops atl1e_ethtool_ops = { .get_eeprom_len = atl1e_get_eeprom_len, .get_eeprom = atl1e_get_eeprom, .set_eeprom = atl1e_set_eeprom, + .get_link_ksettings = atl1e_get_link_ksettings, + .set_link_ksettings = atl1e_set_link_ksettings, }; void atl1e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index c16ec3a51876..4a4ffc0c4c65 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -80,6 +80,24 @@ static void bcma_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, u32 mask, bcma_maskset32(bgmac->bcma.cmn, offset, mask, set); } +static int bcma_phy_connect(struct bgmac *bgmac) +{ + struct phy_device *phy_dev; + char bus_id[MII_BUS_ID_SIZE + 3]; + + /* Connect to the PHY */ + snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id, + bgmac->phyaddr); + phy_dev = phy_connect(bgmac->net_dev, bus_id, bgmac_adjust_link, + PHY_INTERFACE_MODE_MII); + if (IS_ERR(phy_dev)) { + dev_err(bgmac->dev, "PHY connection failed\n"); + return PTR_ERR(phy_dev); + } + + return 0; +} + static const struct bcma_device_id bgmac_bcma_tbl[] = { BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS), @@ -275,6 +293,10 @@ static int bgmac_probe(struct bcma_device *core) bgmac->cco_ctl_maskset = bcma_bgmac_cco_ctl_maskset; bgmac->get_bus_clock = bcma_bgmac_get_bus_clock; bgmac->cmn_maskset32 = bcma_bgmac_cmn_maskset32; + if (bgmac->mii_bus) + bgmac->phy_connect = bcma_phy_connect; + else + bgmac->phy_connect = bgmac_phy_connect_direct; err = bgmac_enet_probe(bgmac); if (err) diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index be52f270c2c1..6f736c19872f 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -14,11 +14,21 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/bcma/bcma.h> +#include <linux/brcmphy.h> #include <linux/etherdevice.h> #include <linux/of_address.h> +#include <linux/of_mdio.h> #include <linux/of_net.h> #include "bgmac.h" +#define NICPM_IOMUX_CTRL 0x00000008 + +#define NICPM_IOMUX_CTRL_INIT_VAL 0x3196e000 +#define NICPM_IOMUX_CTRL_SPD_SHIFT 10 +#define NICPM_IOMUX_CTRL_SPD_10M 0 +#define NICPM_IOMUX_CTRL_SPD_100M 1 +#define NICPM_IOMUX_CTRL_SPD_1000M 2 + static u32 platform_bgmac_read(struct bgmac *bgmac, u16 offset) { return readl(bgmac->plat.base + offset); @@ -86,6 +96,54 @@ static void platform_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, WARN_ON(1); } +static void bgmac_nicpm_speed_set(struct net_device *net_dev) +{ + struct bgmac *bgmac = netdev_priv(net_dev); + u32 val; + + if (!bgmac->plat.nicpm_base) + return; + + val = NICPM_IOMUX_CTRL_INIT_VAL; + switch (bgmac->net_dev->phydev->speed) { + default: + netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n"); + case SPEED_1000: + val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT; + break; + case SPEED_100: + val |= NICPM_IOMUX_CTRL_SPD_100M << NICPM_IOMUX_CTRL_SPD_SHIFT; + break; + case SPEED_10: + val |= NICPM_IOMUX_CTRL_SPD_10M << NICPM_IOMUX_CTRL_SPD_SHIFT; + break; + } + + writel(val, bgmac->plat.nicpm_base + NICPM_IOMUX_CTRL); + + bgmac_adjust_link(bgmac->net_dev); +} + +static int platform_phy_connect(struct bgmac *bgmac) +{ + struct phy_device *phy_dev; + + if (bgmac->plat.nicpm_base) + phy_dev = of_phy_get_and_connect(bgmac->net_dev, + bgmac->dev->of_node, + bgmac_nicpm_speed_set); + else + phy_dev = of_phy_get_and_connect(bgmac->net_dev, + bgmac->dev->of_node, + bgmac_adjust_link); + if (!phy_dev) { + dev_err(bgmac->dev, "PHY connection failed\n"); + return -ENODEV; + } + + return 0; +} + static int bgmac_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -102,7 +160,6 @@ static int bgmac_probe(struct platform_device *pdev) /* Set the features of the 4707 family */ bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; bgmac->feature_flags |= BGMAC_FEAT_NO_RESET; - bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4; bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP; bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP; @@ -142,6 +199,14 @@ static int bgmac_probe(struct platform_device *pdev) if (IS_ERR(bgmac->plat.idm_base)) return PTR_ERR(bgmac->plat.idm_base); + regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base"); + if (regs) { + bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev, + regs); + if (IS_ERR(bgmac->plat.nicpm_base)) + return PTR_ERR(bgmac->plat.nicpm_base); + } + bgmac->read = platform_bgmac_read; bgmac->write = platform_bgmac_write; bgmac->idm_read = platform_bgmac_idm_read; @@ -151,6 +216,12 @@ static int bgmac_probe(struct platform_device *pdev) bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset; bgmac->get_bus_clock = platform_bgmac_get_bus_clock; bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32; + if (of_parse_phandle(np, "phy-handle", 0)) { + bgmac->phy_connect = platform_phy_connect; + } else { + bgmac->phy_connect = bgmac_phy_connect_direct; + bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; + } return bgmac_enet_probe(bgmac); } @@ -167,6 +238,7 @@ static int bgmac_remove(struct platform_device *pdev) static const struct of_device_id bgmac_of_enet_match[] = { {.compatible = "brcm,amac",}, {.compatible = "brcm,nsp-amac",}, + {.compatible = "brcm,ns2-amac",}, {}, }; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 31ca204b38d2..a29787fbb572 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1082,6 +1082,9 @@ static void bgmac_enable(struct bgmac *bgmac) /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */ static void bgmac_chip_init(struct bgmac *bgmac) { + /* Clear any erroneously pending interrupts */ + bgmac_write(bgmac, BGMAC_INT_STATUS, ~0); + /* 1 interrupt per received frame */ bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT); @@ -1388,7 +1391,7 @@ static const struct ethtool_ops bgmac_ethtool_ops = { * MII **************************************************/ -static void bgmac_adjust_link(struct net_device *net_dev) +void bgmac_adjust_link(struct net_device *net_dev) { struct bgmac *bgmac = netdev_priv(net_dev); struct phy_device *phy_dev = net_dev->phydev; @@ -1411,8 +1414,9 @@ static void bgmac_adjust_link(struct net_device *net_dev) phy_print_status(phy_dev); } } +EXPORT_SYMBOL_GPL(bgmac_adjust_link); -static int bgmac_phy_connect_direct(struct bgmac *bgmac) +int bgmac_phy_connect_direct(struct bgmac *bgmac) { struct fixed_phy_status fphy_status = { .link = 1, @@ -1437,24 +1441,7 @@ static int bgmac_phy_connect_direct(struct bgmac *bgmac) return err; } - -static int bgmac_phy_connect(struct bgmac *bgmac) -{ - struct phy_device *phy_dev; - char bus_id[MII_BUS_ID_SIZE + 3]; - - /* Connect to the PHY */ - snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id, - bgmac->phyaddr); - phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link, - PHY_INTERFACE_MODE_MII); - if (IS_ERR(phy_dev)) { - dev_err(bgmac->dev, "PHY connection failed\n"); - return PTR_ERR(phy_dev); - } - - return 0; -} +EXPORT_SYMBOL_GPL(bgmac_phy_connect_direct); int bgmac_enet_probe(struct bgmac *info) { @@ -1507,10 +1494,7 @@ int bgmac_enet_probe(struct bgmac *info) netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT); - if (!bgmac->mii_bus) - err = bgmac_phy_connect_direct(bgmac); - else - err = bgmac_phy_connect(bgmac); + err = bgmac_phy_connect(bgmac); if (err) { dev_err(bgmac->dev, "Cannot connect to phy\n"); goto err_dma_free; diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index 80836b4c9f38..71f493f2451f 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -463,6 +463,7 @@ struct bgmac { struct { void *base; void *idm_base; + void *nicpm_base; } plat; struct { struct bcma_device *core; @@ -513,10 +514,13 @@ struct bgmac { u32 (*get_bus_clock)(struct bgmac *bgmac); void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask, u32 set); + int (*phy_connect)(struct bgmac *bgmac); }; int bgmac_enet_probe(struct bgmac *info); void bgmac_enet_remove(struct bgmac *bgmac); +void bgmac_adjust_link(struct net_device *net_dev); +int bgmac_phy_connect_direct(struct bgmac *bgmac); struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr); void bcma_mdio_mii_unregister(struct mii_bus *mii_bus); @@ -583,4 +587,9 @@ static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set) { bgmac_maskset(bgmac, offset, ~0, set); } + +static inline int bgmac_phy_connect(struct bgmac *bgmac) +{ + return bgmac->phy_connect(bgmac); +} #endif /* _BGMAC_H */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a042da1ff4b9..d313b02485a1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4954,7 +4954,6 @@ static void bnxt_init_napi(struct bnxt *bp) bnapi = bp->bnapi[cp_nr_rings]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll_nitroa0, 64); - napi_hash_add(&bnapi->napi); } } else { bnapi = bp->bnapi[0]; diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index e093cbf26c8c..09602f1187f5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -426,8 +426,14 @@ enum hnae_media_type { * get mac address * set_mac_addr() * set mac address + * clr_mc_addr() + * clear mcast tcam table * set_mc_addr() * set multicast mode + * add_uc_addr() + * add ucast address + * rm_uc_addr() + * remove ucast address * set_mtu() * set mtu * update_stats() @@ -488,6 +494,11 @@ struct hnae_ae_ops { void (*set_promisc_mode)(struct hnae_handle *handle, u32 en); int (*get_mac_addr)(struct hnae_handle *handle, void **p); int (*set_mac_addr)(struct hnae_handle *handle, void *p); + int (*add_uc_addr)(struct hnae_handle *handle, + const unsigned char *addr); + int (*rm_uc_addr)(struct hnae_handle *handle, + const unsigned char *addr); + int (*clr_mc_addr)(struct hnae_handle *handle); int (*set_mc_addr)(struct hnae_handle *handle, void *addr); int (*set_mtu)(struct hnae_handle *handle, int new_mtu); void (*set_tso_stats)(struct hnae_handle *handle, int enable); @@ -590,7 +601,7 @@ static inline int hnae_alloc_buffer_attach(struct hnae_ring *ring, int i) if (ret) return ret; - ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); return 0; } @@ -621,14 +632,14 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i, bops->unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - ring->desc[i].addr = (__le64)ring->desc_cb[i].dma; + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); ring->desc[i].rx.ipoff_bnum_pid_flag = 0; } static inline void hnae_reuse_buffer(struct hnae_ring *ring, int i) { ring->desc_cb[i].reuse_flag = 0; - ring->desc[i].addr = (__le64)(ring->desc_cb[i].dma + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + ring->desc_cb[i].page_offset); ring->desc[i].rx.ipoff_bnum_pid_flag = 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 2d0cb609adc3..0a9cdf00b31a 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -18,9 +18,6 @@ #include "hns_dsaf_rcb.h" #define AE_NAME_PORT_ID_IDX 6 -#define ETH_STATIC_REG 1 -#define ETH_DUMP_REG 5 -#define ETH_GSTRING_LEN 32 static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle *handle) { @@ -202,6 +199,28 @@ static int hns_ae_set_mac_address(struct hnae_handle *handle, void *p) return 0; } +static int hns_ae_add_uc_address(struct hnae_handle *handle, + const unsigned char *addr) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + if (mac_cb->mac_type != HNAE_PORT_SERVICE) + return -ENOSPC; + + return hns_mac_add_uc_addr(mac_cb, handle->vf_id, addr); +} + +static int hns_ae_rm_uc_address(struct hnae_handle *handle, + const unsigned char *addr) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + if (mac_cb->mac_type != HNAE_PORT_SERVICE) + return -ENOSPC; + + return hns_mac_rm_uc_addr(mac_cb, handle->vf_id, addr); +} + static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) { int ret; @@ -235,6 +254,16 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr) return ret; } +static int hns_ae_clr_multicast(struct hnae_handle *handle) +{ + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + + if (mac_cb->mac_type != HNAE_PORT_SERVICE) + return 0; + + return hns_mac_clr_multicast(mac_cb, handle->vf_id); +} + static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu) { struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); @@ -823,7 +852,10 @@ static struct hnae_ae_ops hns_dsaf_ops = { .get_coalesce_range = hns_ae_get_coalesce_range, .set_promisc_mode = hns_ae_set_promisc_mode, .set_mac_addr = hns_ae_set_mac_address, + .add_uc_addr = hns_ae_add_uc_address, + .rm_uc_addr = hns_ae_rm_uc_address, .set_mc_addr = hns_ae_set_multicast_one, + .clr_mc_addr = hns_ae_clr_multicast, .set_mtu = hns_ae_set_mtu, .update_stats = hns_ae_update_stats, .set_tso_stats = hns_ae_set_tso_stats, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 1e1eb92998fb..3382441fe7b5 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -37,8 +37,8 @@ static const struct mac_stats_string g_gmac_stats_string[] = { {"gmac_rx_very_long_err", MAC_STATS_FIELD_OFF(rx_long_err)}, {"gmac_rx_runt_err", MAC_STATS_FIELD_OFF(rx_minto64)}, {"gmac_rx_short_err", MAC_STATS_FIELD_OFF(rx_under_min)}, - {"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_bytes)}, - {"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_pkts)}, + {"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_pkts)}, + {"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_bytes)}, {"gmac_rx_overrun_cnt", MAC_STATS_FIELD_OFF(rx_fifo_overrun_err)}, {"gmac_rx_length_err", MAC_STATS_FIELD_OFF(rx_len_err)}, {"gmac_rx_fail_comma", MAC_STATS_FIELD_OFF(rx_comma_err)}, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 55cbb6ce733c..3239d27143b9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -263,6 +263,46 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, return 0; } +int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr) +{ + struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; + struct dsaf_drv_mac_single_dest_entry mac_entry; + int ret; + + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return -ENOSPC; + + memset(&mac_entry, 0, sizeof(mac_entry)); + memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); + mac_entry.in_port_num = mac_cb->mac_id; + ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num); + if (ret) + return ret; + + return hns_dsaf_set_mac_uc_entry(dsaf_dev, &mac_entry); +} + +int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr) +{ + struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; + struct dsaf_drv_mac_single_dest_entry mac_entry; + int ret; + + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return -ENOSPC; + + memset(&mac_entry, 0, sizeof(mac_entry)); + memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr)); + mac_entry.in_port_num = mac_cb->mac_id; + ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num); + if (ret) + return ret; + + return hns_dsaf_rm_mac_addr(dsaf_dev, &mac_entry); +} + int hns_mac_set_multi(struct hns_mac_cb *mac_cb, u32 port_num, char *addr, bool enable) { @@ -330,13 +370,24 @@ int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac) return 0; } +int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn) +{ + struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; + u8 port_num; + int ret = hns_mac_get_inner_port_num(mac_cb, vfn, &port_num); + + if (ret) + return ret; + + return hns_dsaf_clr_mac_mc_port(dsaf_dev, mac_cb->mac_id, port_num); +} + static void hns_mac_param_get(struct mac_params *param, struct hns_mac_cb *mac_cb) { param->vaddr = (void *)mac_cb->vaddr; param->mac_mode = hns_get_enet_interface(mac_cb); - memcpy(param->addr, mac_cb->addr_entry_idx[0].addr, - MAC_NUM_OCTETS_PER_ADDR); + ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr); param->mac_id = mac_cb->mac_id; param->dev = mac_cb->dev; } @@ -353,8 +404,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb, { int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; - u8 addr[MAC_NUM_OCTETS_PER_ADDR] - = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct dsaf_drv_mac_single_dest_entry mac_entry; /* directy return ok in debug network mode */ @@ -389,8 +439,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable) int ret; struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev; u8 port_num; - u8 addr[MAC_NUM_OCTETS_PER_ADDR] - = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mac_entry_idx *uc_mac_entry; struct dsaf_drv_mac_single_dest_entry mac_entry; @@ -868,6 +917,13 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) } } + if (fwnode_property_read_u8_array(mac_cb->fw_port, "mc-mac-mask", + mac_cb->mc_mask, ETH_ALEN)) { + dev_warn(mac_cb->dev, + "no mc-mac-mask property, set to default value.\n"); + eth_broadcast_addr(mac_cb->mc_mask); + } + return 0; } @@ -1081,6 +1137,8 @@ void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en) { struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + hns_dsaf_set_promisc_tcam(mac_cb->dsaf_dev, mac_cb->mac_id, !!en); + if (mac_ctrl_drv->set_promiscuous) mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index d3a1f72ece0e..2bb3d1e93c64 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -31,7 +31,7 @@ struct dsaf_device; #define MAC_MIN_MTU 68 #define MAC_MAX_MTU_DBG MAC_DEFAULT_MTU -#define MAC_DEFAULT_PAUSE_TIME 0xff +#define MAC_DEFAULT_PAUSE_TIME 0xffff #define MAC_GMAC_IDX 0 #define MAC_XGMAC_IDX 1 @@ -56,9 +56,6 @@ struct dsaf_device; /*check mac addr multicast*/ #define MAC_IS_MULTICAST(p) ((*((u8 *)((p) + 0)) & 0x01) ? (1) : (0)) -/**< Number of octets (8-bit bytes) in an ethernet address */ -#define MAC_NUM_OCTETS_PER_ADDR 6 - struct mac_priv { void *mac; }; @@ -189,7 +186,7 @@ struct mac_statistics { /*mac para struct ,mac get param from nic or dsaf when initialize*/ struct mac_params { - char addr[MAC_NUM_OCTETS_PER_ADDR]; + char addr[ETH_ALEN]; void *vaddr; /*virtual address*/ struct device *dev; u8 mac_id; @@ -214,7 +211,7 @@ struct mac_info { }; struct mac_entry_idx { - u8 addr[MAC_NUM_OCTETS_PER_ADDR]; + u8 addr[ETH_ALEN]; u16 vlan_id:12; u16 valid:1; u16 qos:3; @@ -317,6 +314,7 @@ struct hns_mac_cb { u8 __iomem *serdes_vaddr; struct regmap *serdes_ctrl; struct regmap *cpld_ctrl; + char mc_mask[ETH_ALEN]; u32 cpld_ctrl_reg; u32 port_rst_off; u32 port_mode_off; @@ -409,7 +407,7 @@ struct mac_driver { }; struct mac_stats_string { - char desc[64]; + char desc[ETH_GSTRING_LEN]; unsigned long offset; }; @@ -463,5 +461,10 @@ int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb, void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en); int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, u8 vmid, u8 *port_num); +int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr); +int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id, + const unsigned char *addr); +int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn); #endif /* _HNS_DSAF_MAC_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 8ea3d95fa483..90dbda792614 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -591,6 +591,16 @@ static void hns_dsaf_voq_bp_all_thrd_cfg(struct dsaf_device *dsaf_dev) } } +static void hns_dsaf_tbl_tcam_match_cfg( + struct dsaf_device *dsaf_dev, + struct dsaf_tbl_tcam_data *ptbl_tcam_data) +{ + dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_L_REG, + ptbl_tcam_data->tbl_tcam_data_low); + dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_H_REG, + ptbl_tcam_data->tbl_tcam_data_high); +} + /** * hns_dsaf_tbl_tcam_data_cfg - tbl * @dsaf_id: dsa fabric id @@ -755,7 +765,7 @@ static void hns_dsaf_tbl_tcam_data_ucast_pul( void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en) { - if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) + if (AE_IS_VER1(dsaf_dev->dsaf_ver) && !HNS_DSAF_IS_DEBUG(dsaf_dev)) dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG, DSAF_CFG_MIX_MODE_S, !!en); } @@ -894,15 +904,16 @@ static void hns_dsaf_tcam_uc_cfg( } /** - * hns_dsaf_tcam_mc_cfg - INT - * @dsaf_id: dsa fabric id - * @address, - * @ptbl_tcam_data, - * @ptbl_tcam_mcast, + * hns_dsaf_tcam_mc_cfg - cfg the tcam for mc + * @dsaf_dev: dsa fabric device struct pointer + * @address: tcam index + * @ptbl_tcam_data: tcam data struct pointer + * @ptbl_tcam_mcast: tcam mask struct pointer, it must be null for HNSv1 */ static void hns_dsaf_tcam_mc_cfg( struct dsaf_device *dsaf_dev, u32 address, struct dsaf_tbl_tcam_data *ptbl_tcam_data, + struct dsaf_tbl_tcam_data *ptbl_tcam_mask, struct dsaf_tbl_tcam_mcast_cfg *ptbl_tcam_mcast) { spin_lock_bh(&dsaf_dev->tcam_lock); @@ -913,7 +924,11 @@ static void hns_dsaf_tcam_mc_cfg( hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, ptbl_tcam_data); /*Write Tcam Mcast*/ hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, ptbl_tcam_mcast); - /*Write Plus*/ + /* Write Match Data */ + if (ptbl_tcam_mask) + hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, ptbl_tcam_mask); + + /* Write Puls */ hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev); spin_unlock_bh(&dsaf_dev->tcam_lock); @@ -944,6 +959,16 @@ static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address) spin_unlock_bh(&dsaf_dev->tcam_lock); } +void hns_dsaf_tcam_addr_get(struct dsaf_drv_tbl_tcam_key *mac_key, u8 *addr) +{ + addr[0] = mac_key->high.bits.mac_0; + addr[1] = mac_key->high.bits.mac_1; + addr[2] = mac_key->high.bits.mac_2; + addr[3] = mac_key->high.bits.mac_3; + addr[4] = mac_key->low.bits.mac_4; + addr[5] = mac_key->low.bits.mac_5; +} + /** * hns_dsaf_tcam_uc_get - INT * @dsaf_id: dsa fabric id @@ -1369,6 +1394,12 @@ static int hns_dsaf_init(struct dsaf_device *dsaf_dev) if (HNS_DSAF_IS_DEBUG(dsaf_dev)) return 0; + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + dsaf_dev->tcam_max_num = DSAF_TCAM_SUM; + else + dsaf_dev->tcam_max_num = + DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM; + spin_lock_init(&dsaf_dev->tcam_lock); ret = hns_dsaf_init_hw(dsaf_dev); if (ret) @@ -1424,7 +1455,7 @@ static u16 hns_dsaf_find_soft_mac_entry( u32 i; soft_mac_entry = priv->soft_mac_tbl; - for (i = 0; i < DSAF_TCAM_SUM; i++) { + for (i = 0; i < dsaf_dev->tcam_max_num; i++) { /* invall tab entry */ if ((soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX) && (soft_mac_entry->tcam_key.high.val == mac_key->high.val) && @@ -1449,7 +1480,7 @@ static u16 hns_dsaf_find_empty_mac_entry(struct dsaf_device *dsaf_dev) u32 i; soft_mac_entry = priv->soft_mac_tbl; - for (i = 0; i < DSAF_TCAM_SUM; i++) { + for (i = 0; i < dsaf_dev->tcam_max_num; i++) { /* inv all entry */ if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX) /* return find result --soft index */ @@ -1488,8 +1519,12 @@ static void hns_dsaf_set_mac_key( mac_key->high.bits.mac_3 = addr[3]; mac_key->low.bits.mac_4 = addr[4]; mac_key->low.bits.mac_5 = addr[5]; - mac_key->low.bits.vlan = vlan_id; - mac_key->low.bits.port = port; + dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M, + DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id); + dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S, port); + + mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan); } /** @@ -1507,6 +1542,7 @@ int hns_dsaf_set_mac_uc_entry( struct dsaf_drv_priv *priv = (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; + struct dsaf_tbl_tcam_data tcam_data; /* mac addr check */ if (MAC_IS_ALL_ZEROS(mac_entry->addr) || @@ -1548,9 +1584,10 @@ int hns_dsaf_set_mac_uc_entry( /* default config dvc to 0 */ mac_data.tbl_ucast_dvc = 0; mac_data.tbl_ucast_out_port = mac_entry->port_num; - hns_dsaf_tcam_uc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data); /* config software entry */ soft_mac_entry += entry_index; @@ -1561,6 +1598,55 @@ int hns_dsaf_set_mac_uc_entry( return 0; } +int hns_dsaf_rm_mac_addr( + struct dsaf_device *dsaf_dev, + struct dsaf_drv_mac_single_dest_entry *mac_entry) +{ + u16 entry_index = DSAF_INVALID_ENTRY_IDX; + struct dsaf_tbl_tcam_ucast_cfg mac_data; + struct dsaf_drv_tbl_tcam_key mac_key; + + /* mac addr check */ + if (!is_valid_ether_addr(mac_entry->addr)) { + dev_err(dsaf_dev->dev, "rm_uc_addr %s Mac %pM err!\n", + dsaf_dev->ae_dev.name, mac_entry->addr); + return -EINVAL; + } + + /* config key */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id, + mac_entry->in_port_num, mac_entry->addr); + + entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); + if (entry_index == DSAF_INVALID_ENTRY_IDX) { + /* can not find the tcam entry, return 0 */ + dev_info(dsaf_dev->dev, + "rm_uc_addr no tcam, %s Mac key(%#x:%#x)\n", + dsaf_dev->ae_dev.name, + mac_key.high.val, mac_key.low.val); + return 0; + } + + dev_dbg(dsaf_dev->dev, + "rm_uc_addr, %s Mac key(%#x:%#x) entry_index%d\n", + dsaf_dev->ae_dev.name, mac_key.high.val, + mac_key.low.val, entry_index); + + hns_dsaf_tcam_uc_get( + dsaf_dev, entry_index, + (struct dsaf_tbl_tcam_data *)&mac_key, + &mac_data); + + /* unicast entry not used locally should not clear */ + if (mac_entry->port_num != mac_data.tbl_ucast_out_port) + return -EFAULT; + + return hns_dsaf_del_mac_entry(dsaf_dev, + mac_entry->in_vlan_id, + mac_entry->in_port_num, + mac_entry->addr); +} + /** * hns_dsaf_set_mac_mc_entry - set mac mc-entry * @dsaf_dev: dsa fabric device struct pointer @@ -1577,6 +1663,7 @@ int hns_dsaf_set_mac_mc_entry( (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; struct dsaf_drv_tbl_tcam_key tmp_mac_key; + struct dsaf_tbl_tcam_data tcam_data; /* mac addr check */ if (MAC_IS_ALL_ZEROS(mac_entry->addr)) { @@ -1609,9 +1696,12 @@ int hns_dsaf_set_mac_mc_entry( 0, sizeof(mac_data.tbl_mcast_port_msk)); } else { /* config hardware entry */ - hns_dsaf_tcam_mc_get( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data); + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, + &mac_data); + + tmp_mac_key.high.val = + le32_to_cpu(tcam_data.tbl_tcam_data_high); + tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); } mac_data.tbl_mcast_old_en = 0; mac_data.tbl_mcast_item_vld = 1; @@ -1623,9 +1713,11 @@ int hns_dsaf_set_mac_mc_entry( dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - hns_dsaf_tcam_mc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, NULL, + &mac_data); /* config software entry */ soft_mac_entry += entry_index; @@ -1636,6 +1728,16 @@ int hns_dsaf_set_mac_mc_entry( return 0; } +static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src) +{ + u16 *a = (u16 *)dst; + const u16 *b = (const u16 *)src; + + a[0] &= b[0]; + a[1] &= b[1]; + a[2] &= b[2]; +} + /** * hns_dsaf_add_mac_mc_port - add mac mc-port * @dsaf_dev: dsa fabric device struct pointer @@ -1646,11 +1748,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, { u16 entry_index = DSAF_INVALID_ENTRY_IDX; struct dsaf_drv_tbl_tcam_key mac_key; + struct dsaf_drv_tbl_tcam_key mask_key; + struct dsaf_tbl_tcam_data *pmask_key = NULL; struct dsaf_tbl_tcam_mcast_cfg mac_data; - struct dsaf_drv_priv *priv = - (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; struct dsaf_drv_tbl_tcam_key tmp_mac_key; + struct dsaf_tbl_tcam_data tcam_data; + u8 mc_addr[ETH_ALEN]; + u8 *mc_mask; int mskid; /*chechk mac addr */ @@ -1660,14 +1766,32 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } + ether_addr_copy(mc_addr, mac_entry->addr); + mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask; + if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { + /* prepare for key data setting */ + hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask); + + /* config key mask */ + hns_dsaf_set_mac_key(dsaf_dev, &mask_key, + 0x0, + 0xff, + mc_mask); + + mask_key.high.val = le32_to_cpu(mask_key.high.val); + mask_key.low.val = le32_to_cpu(mask_key.low.val); + + pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); + } + /*config key */ hns_dsaf_set_mac_key( dsaf_dev, &mac_key, mac_entry->in_vlan_id, - mac_entry->in_port_num, mac_entry->addr); + mac_entry->in_port_num, mc_addr); memset(&mac_data, 0, sizeof(struct dsaf_tbl_tcam_mcast_cfg)); - /*check exist? */ + /* check if the tcam is exist */ entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); if (entry_index == DSAF_INVALID_ENTRY_IDX) { /*if hasnot , find a empty*/ @@ -1681,11 +1805,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } } else { - /*if exist, add in */ - hns_dsaf_tcam_mc_get( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data); + /* if exist, add in */ + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, + &mac_data); + + tmp_mac_key.high.val = + le32_to_cpu(tcam_data.tbl_tcam_data_high); + tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); } + /* config hardware entry */ if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) { mskid = mac_entry->port_num; @@ -1708,9 +1836,12 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - hns_dsaf_tcam_mc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + /* config mc entry with mask */ + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, + pmask_key, &mac_data); /*config software entry */ soft_mac_entry += entry_index; @@ -1782,15 +1913,18 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, { u16 entry_index = DSAF_INVALID_ENTRY_IDX; struct dsaf_drv_tbl_tcam_key mac_key; - struct dsaf_drv_priv *priv = - (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; u16 vlan_id; u8 in_port_num; struct dsaf_tbl_tcam_mcast_cfg mac_data; - struct dsaf_drv_tbl_tcam_key tmp_mac_key; + struct dsaf_tbl_tcam_data tcam_data; int mskid; const u8 empty_msk[sizeof(mac_data.tbl_mcast_port_msk)] = {0}; + struct dsaf_drv_tbl_tcam_key mask_key, tmp_mac_key; + struct dsaf_tbl_tcam_data *pmask_key = NULL; + u8 mc_addr[ETH_ALEN]; + u8 *mc_mask; if (!(void *)mac_entry) { dev_err(dsaf_dev->dev, @@ -1798,10 +1932,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } - /*get key info*/ - vlan_id = mac_entry->in_vlan_id; - in_port_num = mac_entry->in_port_num; - /*check mac addr */ if (MAC_IS_ALL_ZEROS(mac_entry->addr)) { dev_err(dsaf_dev->dev, "del_port failed, addr %pM!\n", @@ -1809,11 +1939,31 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, return -EINVAL; } - /*config key */ - hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num, - mac_entry->addr); + /* always mask vlan_id field */ + ether_addr_copy(mc_addr, mac_entry->addr); + mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask; + + if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { + /* prepare for key data setting */ + hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask); + + /* config key mask */ + hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_addr); + + mask_key.high.val = le32_to_cpu(mask_key.high.val); + mask_key.low.val = le32_to_cpu(mask_key.low.val); - /*check is exist? */ + pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key); + } + + /* get key info */ + vlan_id = mac_entry->in_vlan_id; + in_port_num = mac_entry->in_port_num; + + /* config key */ + hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num, mc_addr); + + /* check if the tcam entry is exist */ entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key); if (entry_index == DSAF_INVALID_ENTRY_IDX) { /*find none */ @@ -1829,10 +1979,11 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - /*read entry*/ - hns_dsaf_tcam_mc_get( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data); + /* read entry */ + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + tmp_mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); /*del the port*/ if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) { @@ -1857,15 +2008,87 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, /* del soft entry */ soft_mac_entry += entry_index; soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX; - } else { /* not zer, just del port, updata*/ - hns_dsaf_tcam_mc_cfg( - dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data); + } else { /* not zero, just del port, update */ + tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val); + tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val); + + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, + &tcam_data, + pmask_key, &mac_data); } return 0; } +int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id, + u8 port_num) +{ + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_soft_mac_tbl *soft_mac_entry; + struct dsaf_tbl_tcam_mcast_cfg mac_data; + int ret = 0, i; + + if (HNS_DSAF_IS_DEBUG(dsaf_dev)) + return 0; + + for (i = 0; i < DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM; i++) { + u8 addr[ETH_ALEN]; + u8 port; + + soft_mac_entry = priv->soft_mac_tbl + i; + + hns_dsaf_tcam_addr_get(&soft_mac_entry->tcam_key, addr); + port = dsaf_get_field( + soft_mac_entry->tcam_key.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S); + /* check valid tcam mc entry */ + if (soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX && + port == mac_id && + is_multicast_ether_addr(addr) && + !is_broadcast_ether_addr(addr)) { + const u32 empty_msk[DSAF_PORT_MSK_NUM] = {0}; + struct dsaf_drv_mac_single_dest_entry mac_entry; + + /* disable receiving of this multicast address for + * the VF. + */ + ether_addr_copy(mac_entry.addr, addr); + mac_entry.in_vlan_id = dsaf_get_field( + soft_mac_entry->tcam_key.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_VLAN_M, + DSAF_TBL_TCAM_KEY_VLAN_S); + mac_entry.in_port_num = mac_id; + mac_entry.port_num = port_num; + if (hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry)) { + ret = -EINVAL; + continue; + } + + /* disable receiving of this multicast address for + * the mac port if all VF are disable + */ + hns_dsaf_tcam_mc_get(dsaf_dev, i, + (struct dsaf_tbl_tcam_data *) + (&soft_mac_entry->tcam_key), + &mac_data); + dsaf_set_bit(mac_data.tbl_mcast_port_msk[mac_id / 32], + mac_id % 32, 0); + if (!memcmp(mac_data.tbl_mcast_port_msk, empty_msk, + sizeof(u32) * DSAF_PORT_MSK_NUM)) { + mac_entry.port_num = mac_id; + if (hns_dsaf_del_mac_mc_port(dsaf_dev, + &mac_entry)) { + ret = -EINVAL; + continue; + } + } + } + } + + return ret; +} + /** * hns_dsaf_get_mac_uc_entry - get mac uc entry * @dsaf_dev: dsa fabric device struct pointer @@ -1878,6 +2101,7 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev, struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_tbl_tcam_ucast_cfg mac_data; + struct dsaf_tbl_tcam_data tcam_data; /* check macaddr */ if (MAC_IS_ALL_ZEROS(mac_entry->addr) || @@ -1906,9 +2130,12 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev, dsaf_dev->ae_dev.name, mac_key.high.val, mac_key.low.val, entry_index); - /*read entry*/ - hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data); + /* read entry */ + hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); + mac_entry->port_num = mac_data.tbl_ucast_out_port; return 0; @@ -1926,6 +2153,7 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev, struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_tbl_tcam_mcast_cfg mac_data; + struct dsaf_tbl_tcam_data tcam_data; /*check mac addr */ if (MAC_IS_ALL_ZEROS(mac_entry->addr) || @@ -1955,8 +2183,10 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev, mac_key.low.val, entry_index); /*read entry */ - hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data); + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F; return 0; @@ -1976,9 +2206,10 @@ int hns_dsaf_get_mac_entry_by_index( struct dsaf_tbl_tcam_mcast_cfg mac_data; struct dsaf_tbl_tcam_ucast_cfg mac_uc_data; - char mac_addr[MAC_NUM_OCTETS_PER_ADDR] = {0}; + struct dsaf_tbl_tcam_data tcam_data; + char mac_addr[ETH_ALEN] = {0}; - if (entry_index >= DSAF_TCAM_SUM) { + if (entry_index >= dsaf_dev->tcam_max_num) { /* find none, del error */ dev_err(dsaf_dev->dev, "get_uc_entry failed, %s\n", dsaf_dev->ae_dev.name); @@ -1986,8 +2217,10 @@ int hns_dsaf_get_mac_entry_by_index( } /* mc entry, do read opt */ - hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data); + hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F; @@ -2004,9 +2237,12 @@ int hns_dsaf_get_mac_entry_by_index( /**mc donot do*/ } else { /*is not mc, just uc... */ - hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, - (struct dsaf_tbl_tcam_data *)&mac_key, + hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_uc_data); + + mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high); + mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low); + mac_entry->port_mask[0] = (1 << mac_uc_data.tbl_ucast_out_port); } @@ -2670,6 +2906,59 @@ int hns_dsaf_get_regs_count(void) return DSAF_DUMP_REGS_NUM; } +/* Reserve the last TCAM entry for promisc support */ +#define dsaf_promisc_tcam_entry(port) \ + (DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port)) +void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, + u32 port, bool enable) +{ + struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); + struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; + u16 entry_index; + struct dsaf_drv_tbl_tcam_key tbl_tcam_data, tbl_tcam_mask; + struct dsaf_tbl_tcam_mcast_cfg mac_data = {0}; + + if ((AE_IS_VER1(dsaf_dev->dsaf_ver)) || HNS_DSAF_IS_DEBUG(dsaf_dev)) + return; + + /* find the tcam entry index for promisc */ + entry_index = dsaf_promisc_tcam_entry(port); + + /* config key mask */ + if (enable) { + memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); + memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); + dsaf_set_field(tbl_tcam_data.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S, port); + dsaf_set_field(tbl_tcam_mask.low.bits.port_vlan, + DSAF_TBL_TCAM_KEY_PORT_M, + DSAF_TBL_TCAM_KEY_PORT_S, 0xf); + + /* SUB_QID */ + dsaf_set_bit(mac_data.tbl_mcast_port_msk[0], + DSAF_SERVICE_NW_NUM, true); + mac_data.tbl_mcast_item_vld = true; /* item_vld bit */ + } else { + mac_data.tbl_mcast_item_vld = false; /* item_vld bit */ + } + + dev_dbg(dsaf_dev->dev, + "set_promisc_entry, %s Mac key(%#x:%#x) entry_index%d\n", + dsaf_dev->ae_dev.name, tbl_tcam_data.high.val, + tbl_tcam_data.low.val, entry_index); + + /* config promisc entry with mask */ + hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, + (struct dsaf_tbl_tcam_data *)&tbl_tcam_data, + (struct dsaf_tbl_tcam_data *)&tbl_tcam_mask, + &mac_data); + + /* config software entry */ + soft_mac_entry += entry_index; + soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX; +} + /** * dsaf_probe - probo dsaf dev * @pdev: dasf platform device diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index c494fc52be74..cef6bf46ae93 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -35,8 +35,6 @@ struct hns_mac_cb; #define DSAF_CFG_READ_CNT 30 -#define MAC_NUM_OCTETS_PER_ADDR 6 - #define DSAF_DUMP_REGS_NUM 504 #define DSAF_STATIC_NUM 28 #define DSAF_V2_STATIC_NUM 44 @@ -165,7 +163,7 @@ enum dsaf_mode { /*mac entry, mc or uc entry*/ struct dsaf_drv_mac_single_dest_entry { /* mac addr, match the entry*/ - u8 addr[MAC_NUM_OCTETS_PER_ADDR]; + u8 addr[ETH_ALEN]; u16 in_vlan_id; /* value of VlanId */ /* the vld input port num, dsaf-mode fix 0, */ @@ -179,7 +177,7 @@ struct dsaf_drv_mac_single_dest_entry { /*only mc entry*/ struct dsaf_drv_mac_multi_dest_entry { /* mac addr, match the entry*/ - u8 addr[MAC_NUM_OCTETS_PER_ADDR]; + u8 addr[ETH_ALEN]; u16 in_vlan_id; /* this mac addr output port,*/ /* bit0-bit5 means Port0-Port5(1bit is vld)**/ @@ -308,8 +306,6 @@ struct dsaf_misc_op { /* reset series function, it will be reset if the dereset is 0 */ void (*dsaf_reset)(struct dsaf_device *dsaf_dev, bool dereset); void (*xge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); - void (*xge_core_srst)(struct dsaf_device *dsaf_dev, u32 port, - bool dereset); void (*ge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_comm_srst)(struct dsaf_device *dsaf_dev, bool dereset); @@ -343,6 +339,7 @@ struct dsaf_device { enum hal_dsaf_mode dsaf_en; enum hal_dsaf_tc_mode dsaf_tc_mode; u32 dsaf_ver; + u16 tcam_max_num; /* max TCAM entry for user except promisc */ struct ppe_common_cb *ppe_common[DSAF_COMM_DEV_NUM]; struct rcb_common_cb *rcb_common[DSAF_COMM_DEV_NUM]; @@ -360,6 +357,11 @@ static inline void *hns_dsaf_dev_priv(const struct dsaf_device *dsaf_dev) return (void *)((u8 *)dsaf_dev + sizeof(*dsaf_dev)); } +#define DSAF_TBL_TCAM_KEY_PORT_S 0 +#define DSAF_TBL_TCAM_KEY_PORT_M (((1ULL << 4) - 1) << 0) +#define DSAF_TBL_TCAM_KEY_VLAN_S 4 +#define DSAF_TBL_TCAM_KEY_VLAN_M (((1ULL << 12) - 1) << 4) + struct dsaf_drv_tbl_tcam_key { union { struct { @@ -373,11 +375,9 @@ struct dsaf_drv_tbl_tcam_key { } high; union { struct { - u32 port:4; /* port id, */ - /* dsaf-mode fixed 0, non-dsaf-mode port id*/ - u32 vlan:12; /* vlan id */ - u32 mac_5:8; - u32 mac_4:8; + u16 port_vlan; + u8 mac_5; + u8 mac_4; } bits; u32 val; @@ -461,10 +461,19 @@ void hns_dsaf_get_strings(int stringset, u8 *data, int port, void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data); int hns_dsaf_get_regs_count(void); void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en); +void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, + u32 port, bool enable); void hns_dsaf_get_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id, u32 *en); int hns_dsaf_set_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id, u32 en); +int hns_dsaf_rm_mac_addr( + struct dsaf_device *dsaf_dev, + struct dsaf_drv_mac_single_dest_entry *mac_entry); + +int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, + u8 mac_id, u8 port_num); + #endif /* __HNS_DSAF_MAIN_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 67accce1d33d..a2c22d084ce9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -23,7 +23,6 @@ enum _dsm_op_index { enum _dsm_rst_type { HNS_DSAF_RESET_FUNC = 0x1, HNS_PPE_RESET_FUNC = 0x2, - HNS_XGE_CORE_RESET_FUNC = 0x3, HNS_XGE_RESET_FUNC = 0x4, HNS_GE_RESET_FUNC = 0x5, HNS_DSAF_CHN_RESET_FUNC = 0x6, @@ -213,26 +212,6 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev, HNS_XGE_RESET_FUNC, port, dereset); } -static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev, - u32 port, bool dereset) -{ - u32 reg_val = 0; - u32 reg_addr; - - if (port >= DSAF_XGE_NUM) - return; - - reg_val |= XGMAC_TRX_CORE_SRST_M - << dsaf_dev->mac_cb[port]->port_rst_off; - - if (!dereset) - reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG; - else - reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG; - - dsaf_write_sub(dsaf_dev, reg_addr, reg_val); -} - /** * hns_dsaf_srst_chns - reset dsaf channels * @dsaf_dev: dsaf device struct pointer @@ -293,14 +272,6 @@ void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset) HNS_ROCE_RESET_FUNC, 0, dereset); } -static void -hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev, - u32 port, bool dereset) -{ - hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC, - HNS_XGE_CORE_RESET_FUNC, port, dereset); -} - static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, bool dereset) { @@ -597,7 +568,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->dsaf_reset = hns_dsaf_rst; misc_op->xge_srst = hns_dsaf_xge_srst_by_port; - misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port; misc_op->ge_srst = hns_dsaf_ge_srst_by_port; misc_op->ppe_srst = hns_ppe_srst_by_port; misc_op->ppe_comm_srst = hns_ppe_com_srst; @@ -615,7 +585,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->dsaf_reset = hns_dsaf_rst_acpi; misc_op->xge_srst = hns_dsaf_xge_srst_by_port_acpi; - misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port_acpi; misc_op->ge_srst = hns_dsaf_ge_srst_by_port_acpi; misc_op->ppe_srst = hns_ppe_srst_by_port_acpi; misc_op->ppe_comm_srst = hns_ppe_com_srst; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 878950a42e6c..87226685f742 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -41,6 +41,9 @@ #define DSAF_SW_PORT_NUM 8 #define DSAF_TOTAL_QUEUE_NUM 129 +/* reserved a tcam entry for each port to support promisc by fuzzy match */ +#define DSAFV2_MAC_FUZZY_TCAM_NUM DSAF_MAX_PORT_NUM + #define DSAF_TCAM_SUM 512 #define DSAF_LINE_SUM (2048 * 14) @@ -297,6 +300,8 @@ #define DSAF_TBL_LKUP_NUM_I_0_REG 0x50C0 #define DSAF_TBL_LKUP_NUM_O_0_REG 0x50E0 #define DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG 0x510C +#define DSAF_TBL_TCAM_MATCH_CFG_H_REG 0x5130 +#define DSAF_TBL_TCAM_MATCH_CFG_L_REG 0x5134 #define DSAF_INODE_FIFO_WL_0_REG 0x6000 #define DSAF_ONODE_FIFO_WL_0_REG 0x6020 @@ -309,7 +314,6 @@ #define PPE_COM_INTEN_REG 0x110 #define PPE_COM_RINT_REG 0x114 #define PPE_COM_INTSTS_REG 0x118 -#define PPE_COM_COMMON_CNT_CLR_CE_REG 0x1120 #define PPE_COM_HIS_RX_PKT_QID_DROP_CNT_REG 0x300 #define PPE_COM_HIS_RX_PKT_QID_OK_CNT_REG 0x600 #define PPE_COM_HIS_TX_PKT_QID_ERR_CNT_REG 0x900 @@ -698,8 +702,6 @@ #define XGMAC_RX_SYMBOLERRPKTS 0x0210 #define XGMAC_RX_FCSERRPKTS 0x0218 -#define XGMAC_TRX_CORE_SRST_M 0x2080 - #define DSAF_SRAM_INIT_OVER_M 0xff #define DSAFV2_SRAM_INIT_OVER_M 0x3ff #define DSAF_SRAM_INIT_OVER_S 0 @@ -978,6 +980,11 @@ #define XGMAC_ENABLE_TX_B 0 #define XGMAC_ENABLE_RX_B 1 +#define XGMAC_UNIDIR_EN_B 0 +#define XGMAC_RF_TX_EN_B 1 +#define XGMAC_LF_RF_INSERT_S 2 +#define XGMAC_LF_RF_INSERT_M (0x3 << XGMAC_LF_RF_INSERT_S) + #define XGMAC_CTL_TX_FCS_B 0 #define XGMAC_CTL_TX_PAD_B 1 #define XGMAC_CTL_TX_PREAMBLE_TRANS_B 3 diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c index 8f4f0e8da984..aae830a93050 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c @@ -108,6 +108,31 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value) } /** + * hns_xgmac_tx_lf_rf_insert - insert lf rf control about xgmac + * @mac_drv: mac driver + * @mode: inserf rf or lf + */ +static void hns_xgmac_lf_rf_insert(struct mac_driver *mac_drv, u32 mode) +{ + dsaf_set_dev_field(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, + XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, mode); +} + +/** + * hns_xgmac__lf_rf_control_init - initial the lf rf control register + * @mac_drv: mac driver + */ +static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv) +{ + u32 val = 0; + + dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0); + dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1); + dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0); + dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val); +} + +/** *hns_xgmac_enable - enable xgmac port *@drv: mac driver *@mode: mode of mac port @@ -115,12 +140,8 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value) static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode) { struct mac_driver *drv = (struct mac_driver *)mac_drv; - struct dsaf_device *dsaf_dev - = (struct dsaf_device *)dev_get_drvdata(drv->dev); - u32 port = drv->mac_id; - dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 1); - mdelay(10); + hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_NO_LF_RF_INSERT); /*enable XGE rX/tX */ if (mode == MAC_COMM_MODE_TX) { @@ -143,9 +164,6 @@ static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode) static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode) { struct mac_driver *drv = (struct mac_driver *)mac_drv; - struct dsaf_device *dsaf_dev - = (struct dsaf_device *)dev_get_drvdata(drv->dev); - u32 port = drv->mac_id; if (mode == MAC_COMM_MODE_TX) { hns_xgmac_tx_enable(drv, 0); @@ -155,9 +173,7 @@ static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode) hns_xgmac_tx_enable(drv, 0); hns_xgmac_rx_enable(drv, 0); } - - mdelay(10); - dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 0); + hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_LF_INSERT); } /** @@ -203,6 +219,7 @@ static void hns_xgmac_init(void *mac_drv) dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 1); mdelay(100); + hns_xgmac_lf_rf_control_init(drv); hns_xgmac_exc_irq_en(drv, 0); hns_xgmac_pma_fec_enable(drv, 0x0, 0x0); @@ -788,7 +805,7 @@ static int hns_xgmac_get_sset_count(int stringset) */ static int hns_xgmac_get_regs_count(void) { - return ETH_XGMAC_DUMP_NUM; + return HNS_XGMAC_DUMP_NUM; } void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h index 139f7297c7b4..da6c5343d3e1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h @@ -10,6 +10,7 @@ #ifndef _HNS_XGMAC_H #define _HNS_XGMAC_H -#define ETH_XGMAC_DUMP_NUM (214) - +#define HNS_XGMAC_DUMP_NUM 214 +#define HNS_XGMAC_NO_LF_RF_INSERT 0x0 +#define HNS_XGMAC_LF_INSERT 0x2 #endif diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 60831a2ac86b..776d81e785d8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1493,6 +1493,29 @@ static netdev_features_t hns_nic_fix_features( return features; } +static int hns_nic_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + if (h->dev->ops->add_uc_addr) + return h->dev->ops->add_uc_addr(h, addr); + + return 0; +} + +static int hns_nic_uc_unsync(struct net_device *netdev, + const unsigned char *addr) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + struct hnae_handle *h = priv->ae_handle; + + if (h->dev->ops->rm_uc_addr) + return h->dev->ops->rm_uc_addr(h, addr); + + return 0; +} + /** * nic_set_multicast_list - set mutl mac address * @netdev: net device @@ -1511,6 +1534,10 @@ void hns_set_multicast_list(struct net_device *ndev) return; } + if (h->dev->ops->clr_mc_addr) + if (h->dev->ops->clr_mc_addr(h)) + netdev_err(ndev, "clear multicast address fail\n"); + if (h->dev->ops->set_mc_addr) { netdev_for_each_mc_addr(ha, ndev) if (h->dev->ops->set_mc_addr(h, ha->addr)) @@ -1531,6 +1558,9 @@ void hns_nic_set_rx_mode(struct net_device *ndev) } hns_set_multicast_list(ndev); + + if (__dev_uc_sync(ndev, hns_nic_uc_sync, hns_nic_uc_unsync)) + netdev_err(ndev, "sync uc address fail\n"); } struct rtnl_link_stats64 *hns_nic_get_stats64(struct net_device *ndev, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 87d5c94b2810..3ac2183dbd21 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1178,7 +1178,8 @@ static int hns_nic_nway_reset(struct net_device *netdev) struct phy_device *phy = netdev->phydev; if (netif_running(netdev)) { - if (phy) + /* if autoneg is disabled, don't restart auto-negotiation */ + if (phy && phy->autoneg == AUTONEG_ENABLE) ret = genphy_restart_aneg(phy); } diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index a7895c4cbcc3..c30eea8399a7 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -226,7 +226,7 @@ static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfine_82580(struct ptp_clock_info *ptp, long scaled_ppm) { struct igb_adapter *igb = container_of(ptp, struct igb_adapter, ptp_caps); @@ -235,13 +235,13 @@ static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) u64 rate; u32 inca; - if (ppb < 0) { + if (scaled_ppm < 0) { neg_adj = 1; - ppb = -ppb; + scaled_ppm = -scaled_ppm; } - rate = ppb; - rate <<= 26; - rate = div_u64(rate, 1953125); + rate = scaled_ppm; + rate <<= 13; + rate = div_u64(rate, 15625); inca = rate & INCVALUE_MASK; if (neg_adj) @@ -1103,7 +1103,7 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.max_adj = 62499999; adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576; adapter->ptp_caps.settime64 = igb_ptp_settime_82576; @@ -1131,7 +1131,7 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.n_pins = IGB_N_SDP; adapter->ptp_caps.pps = 1; adapter->ptp_caps.pin_config = adapter->sdp_config; - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; adapter->ptp_caps.gettime64 = igb_ptp_gettime_i210; adapter->ptp_caps.settime64 = igb_ptp_settime_i210; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b06e32d0d22a..ef81c3d8c295 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -1027,4 +1027,6 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, struct ixgbe_ring *tx_ring); u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter); void ixgbe_store_reta(struct ixgbe_adapter *adapter); +s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm); #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index fb51be74dd4c..805ab319e578 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -367,7 +367,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) } /* Negotiate the fc mode to use */ - ixgbe_fc_autoneg(hw); + hw->mac.ops.fc_autoneg(hw); /* Disable any previous flow control settings */ fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL); @@ -1179,6 +1179,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = { .get_link_capabilities = &ixgbe_get_link_capabilities_82598, .led_on = &ixgbe_led_on_generic, .led_off = &ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .blink_led_start = &ixgbe_blink_led_start_generic, .blink_led_stop = &ixgbe_blink_led_stop_generic, .set_rar = &ixgbe_set_rar_generic, @@ -1193,6 +1194,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = { .set_vfta = &ixgbe_set_vfta_82598, .fc_enable = &ixgbe_fc_enable_82598, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, .set_fw_drv_ver = NULL, .acquire_swfw_sync = &ixgbe_acquire_swfw_sync, .release_swfw_sync = &ixgbe_release_swfw_sync, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 63b25006ac90..e00aaeb91827 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -2204,6 +2204,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = { .get_link_capabilities = &ixgbe_get_link_capabilities_82599, .led_on = &ixgbe_led_on_generic, .led_off = &ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .blink_led_start = &ixgbe_blink_led_start_generic, .blink_led_stop = &ixgbe_blink_led_stop_generic, .set_rar = &ixgbe_set_rar_generic, @@ -2219,6 +2220,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = { .set_vfta = &ixgbe_set_vfta_generic, .fc_enable = &ixgbe_fc_enable_generic, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, .set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic, .init_uta_tables = &ixgbe_init_uta_tables_generic, .setup_sfp = &ixgbe_setup_sfp_modules_82599, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 77d3039283f6..8832df3eba25 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -298,10 +298,12 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); IXGBE_WRITE_FLUSH(hw); - /* Setup flow control */ - ret_val = hw->mac.ops.setup_fc(hw); - if (ret_val) - return ret_val; + /* Setup flow control if method for doing so */ + if (hw->mac.ops.setup_fc) { + ret_val = hw->mac.ops.setup_fc(hw); + if (ret_val) + return ret_val; + } /* Cashe bit indicating need for crosstalk fix */ switch (hw->mac.type) { @@ -390,6 +392,9 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw) status = hw->mac.ops.start_hw(hw); } + /* Initialize the LED link active for LED blink support */ + hw->mac.ops.init_led_link_act(hw); + return status; } @@ -773,6 +778,49 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) } /** + * ixgbe_init_led_link_act_generic - Store the LED index link/activity. + * @hw: pointer to hardware structure + * + * Store the index for the link active LED. This will be used to support + * blinking the LED. + **/ +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u32 led_reg, led_mode; + u16 i; + + led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* Get LED link active from the LEDCTL register */ + for (i = 0; i < 4; i++) { + led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i); + + if ((led_mode & IXGBE_LED_MODE_MASK_BASE) == + IXGBE_LED_LINK_ACTIVE) { + mac->led_link_act = i; + return 0; + } + } + + /* If LEDCTL register does not have the LED link active set, then use + * known MAC defaults. + */ + switch (hw->mac.type) { + case ixgbe_mac_x550em_a: + mac->led_link_act = 0; + break; + case ixgbe_mac_X550EM_x: + mac->led_link_act = 1; + break; + default: + mac->led_link_act = 2; + } + + return 0; +} + +/** * ixgbe_led_on_generic - Turns on the software controllable LEDs. * @hw: pointer to hardware structure * @index: led number to turn on @@ -2127,7 +2175,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) } /* Negotiate the fc mode to use */ - ixgbe_fc_autoneg(hw); + hw->mac.ops.fc_autoneg(hw); /* Disable any previous flow control settings */ mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN); @@ -2231,8 +2279,8 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) * Find the intersection between advertised settings and link partner's * advertised settings **/ -static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, - u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) +s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) { if ((!(adv_reg)) || (!(lp_reg))) return IXGBE_ERR_FC_NOT_NEGOTIATED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index 6d4c260d0cbd..5b3e3c65927e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -49,6 +49,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index); s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw); s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw); s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index f49f80380aa5..fd192bf29b26 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2225,11 +2225,11 @@ static int ixgbe_set_phys_id(struct net_device *netdev, return 2; case ETHTOOL_ID_ON: - hw->mac.ops.led_on(hw, hw->bus.func); + hw->mac.ops.led_on(hw, hw->mac.led_link_act); break; case ETHTOOL_ID_OFF: - hw->mac.ops.led_off(hw, hw->bus.func); + hw->mac.ops.led_off(hw, hw->mac.led_link_act); break; case ETHTOOL_ID_INACTIVE: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5e1f57c7ee1b..2436984481cc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -54,6 +54,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> +#include <net/vxlan.h> #include "ixgbe.h" #include "ixgbe_common.h" @@ -3070,6 +3071,9 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter) return; } + if (!adapter->msix_entries) + return; + for (vector = 0; vector < adapter->num_q_vectors; vector++) { struct ixgbe_q_vector *q_vector = adapter->q_vector[vector]; struct msix_entry *entry = &adapter->msix_entries[vector]; @@ -5621,7 +5625,8 @@ static void ixgbe_init_dcb(struct ixgbe_adapter *adapter) * Fields are initialized based on PCI device information and * OS network device settings (MTU size). **/ -static int ixgbe_sw_init(struct ixgbe_adapter *adapter) +static int ixgbe_sw_init(struct ixgbe_adapter *adapter, + const struct ixgbe_info *ii) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; @@ -5637,6 +5642,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) hw->subsystem_vendor_id = pdev->subsystem_vendor; hw->subsystem_device_id = pdev->subsystem_device; + /* get_invariants needs the device IDs */ + ii->get_invariants(hw); + /* Set common capability flags and settings */ rss = min_t(int, ixgbe_max_rss_indices(adapter), num_online_cpus()); adapter->ring_feature[RING_F_RSS].limit = rss; @@ -7653,11 +7661,17 @@ static void ixgbe_atr(struct ixgbe_ring *ring, /* snag network header to get L4 type and address */ skb = first->skb; hdr.network = skb_network_header(skb); + if (unlikely(hdr.network <= skb->data)) + return; if (skb->encapsulation && first->protocol == htons(ETH_P_IP) && - hdr.ipv4->protocol != IPPROTO_UDP) { + hdr.ipv4->protocol == IPPROTO_UDP) { struct ixgbe_adapter *adapter = q_vector->adapter; + if (unlikely(skb_tail_pointer(skb) < hdr.network + + VXLAN_HEADROOM)) + return; + /* verify the port is recognized as VXLAN */ if (adapter->vxlan_port && udp_hdr(skb)->dest == adapter->vxlan_port) @@ -7668,6 +7682,12 @@ static void ixgbe_atr(struct ixgbe_ring *ring, hdr.network = skb_inner_network_header(skb); } + /* Make sure we have at least [minimum IPv4 header + TCP] + * or [IPv6 header] bytes + */ + if (unlikely(skb_tail_pointer(skb) < hdr.network + 40)) + return; + /* Currently only IPv4/IPv6 with TCP is supported */ switch (hdr.ipv4->version) { case IPVERSION: @@ -7687,6 +7707,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring, if (l4_proto != IPPROTO_TCP) return; + if (unlikely(skb_tail_pointer(skb) < hdr.network + + hlen + sizeof(struct tcphdr))) + return; + th = (struct tcphdr *)(hdr.network + hlen); /* skip this packet since the socket is closing */ @@ -9504,6 +9528,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->mac.ops = *ii->mac_ops; hw->mac.type = ii->mac; hw->mvals = ii->mvals; + if (ii->link_ops) + hw->link.ops = *ii->link_ops; /* EEPROM */ hw->eeprom.ops = *ii->eeprom_ops; @@ -9527,10 +9553,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->phy.mdio.mdio_read = ixgbe_mdio_read; hw->phy.mdio.mdio_write = ixgbe_mdio_write; - ii->get_invariants(hw); - /* setup the private structure */ - err = ixgbe_sw_init(adapter); + err = ixgbe_sw_init(adapter, ii); if (err) goto err_sw_init; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 021ab9b89c71..3b8362085f57 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -109,8 +109,8 @@ static u8 ixgbe_ones_comp_byte_add(u8 add1, u8 add2) * * Returns an error code on error. */ -static s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val, bool lock) +s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 *val, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; int max_retry = 10; @@ -178,36 +178,6 @@ fail: } /** - * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation - * @hw: pointer to the hardware structure - * @addr: I2C bus address to read from - * @reg: I2C device register to read from - * @val: pointer to location to receive read value - * - * Returns an error code on error. - */ -s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val) -{ - return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true); -} - -/** - * ixgbe_read_i2c_combined_generic_unlocked - Unlocked I2C read combined - * @hw: pointer to the hardware structure - * @addr: I2C bus address to read from - * @reg: I2C device register to read from - * @val: pointer to location to receive read value - * - * Returns an error code on error. - */ -s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val) -{ - return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false); -} - -/** * ixgbe_write_i2c_combined_generic_int - Perform I2C write combined operation * @hw: pointer to the hardware structure * @addr: I2C bus address to write to @@ -217,8 +187,8 @@ s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, * * Returns an error code on error. */ -static s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 val, bool lock) +s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 val, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; int max_retry = 1; @@ -273,33 +243,39 @@ fail: } /** - * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation - * @hw: pointer to the hardware structure - * @addr: I2C bus address to write to - * @reg: I2C device register to write to - * @val: value to write + * ixgbe_probe_phy - Probe a single address for a PHY + * @hw: pointer to hardware structure + * @phy_addr: PHY address to probe * - * Returns an error code on error. - */ -s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, - u8 addr, u16 reg, u16 val) + * Returns true if PHY found + **/ +static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) { - return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true); -} + u16 ext_ability = 0; -/** - * ixgbe_write_i2c_combined_generic_unlocked - Unlocked I2C write combined - * @hw: pointer to the hardware structure - * @addr: I2C bus address to write to - * @reg: I2C device register to write to - * @val: value to write - * - * Returns an error code on error. - */ -s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, - u8 addr, u16 reg, u16 val) -{ - return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false); + hw->phy.mdio.prtad = phy_addr; + if (mdio45_probe(&hw->phy.mdio, phy_addr) != 0) + return false; + + if (ixgbe_get_phy_id(hw)) + return false; + + hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id); + + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.ops.read_reg(hw, + MDIO_PMA_EXTABLE, + MDIO_MMD_PMAPMD, + &ext_ability); + if (ext_ability & + (MDIO_PMA_EXTABLE_10GBT | + MDIO_PMA_EXTABLE_1000BT)) + hw->phy.type = ixgbe_phy_cu_unknown; + else + hw->phy.type = ixgbe_phy_generic; + } + + return true; } /** @@ -311,7 +287,7 @@ s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { u32 phy_addr; - u16 ext_ability = 0; + u32 status = IXGBE_ERR_PHY_ADDR_INVALID; if (!hw->phy.phy_semaphore_mask) { if (hw->bus.lan_id) @@ -320,37 +296,34 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; } - if (hw->phy.type == ixgbe_phy_unknown) { - for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { - hw->phy.mdio.prtad = phy_addr; - if (mdio45_probe(&hw->phy.mdio, phy_addr) == 0) { - ixgbe_get_phy_id(hw); - hw->phy.type = - ixgbe_get_phy_type_from_id(hw->phy.id); - - if (hw->phy.type == ixgbe_phy_unknown) { - hw->phy.ops.read_reg(hw, - MDIO_PMA_EXTABLE, - MDIO_MMD_PMAPMD, - &ext_ability); - if (ext_ability & - (MDIO_PMA_EXTABLE_10GBT | - MDIO_PMA_EXTABLE_1000BT)) - hw->phy.type = - ixgbe_phy_cu_unknown; - else - hw->phy.type = - ixgbe_phy_generic; - } + if (hw->phy.type != ixgbe_phy_unknown) + return 0; - return 0; - } + if (hw->phy.nw_mng_if_sel) { + phy_addr = (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + if (ixgbe_probe_phy(hw, phy_addr)) + return 0; + else + return IXGBE_ERR_PHY_ADDR_INVALID; + } + + for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { + if (ixgbe_probe_phy(hw, phy_addr)) { + status = 0; + break; } - /* indicate no PHY found */ - hw->phy.mdio.prtad = MDIO_PRTAD_NONE; - return IXGBE_ERR_PHY_ADDR_INVALID; } - return 0; + + /* Certain media types do not have a phy so an address will not + * be found and the code will take this path. Caller has to + * decide if it is an error or not. + */ + if (status) + hw->phy.mdio.prtad = MDIO_PRTAD_NONE; + + return status; } /** @@ -416,7 +389,8 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) case TN1010_PHY_ID: phy_type = ixgbe_phy_tn; break; - case X550_PHY_ID: + case X550_PHY_ID2: + case X550_PHY_ID3: case X540_PHY_ID: phy_type = ixgbe_phy_aq; break; @@ -427,6 +401,7 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) phy_type = ixgbe_phy_nl; break; case X557_PHY_ID: + case X557_PHY_ID2: phy_type = ixgbe_phy_x550em_ext_t; break; default: @@ -477,8 +452,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) */ for (i = 0; i < 30; i++) { msleep(100); - hw->phy.ops.read_reg(hw, MDIO_CTRL1, - MDIO_MMD_PHYXS, &ctrl); + hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS, &ctrl); if (!(ctrl & MDIO_CTRL1_RESET)) { udelay(2); break; @@ -705,53 +679,52 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - /* Set or unset auto-negotiation 10G advertisement */ - hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, - MDIO_MMD_AN, - &autoneg_reg); + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, &autoneg_reg); - autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) - autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G; - - hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL, - MDIO_MMD_AN, - autoneg_reg); - } + autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) && + (speed & IXGBE_LINK_SPEED_10GB_FULL)) + autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G; - if (speed & IXGBE_LINK_SPEED_1GB_FULL) { - /* Set or unset auto-negotiation 1G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - MDIO_MMD_AN, - &autoneg_reg); + hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, autoneg_reg); - autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) - autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + MDIO_MMD_AN, &autoneg_reg); - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - MDIO_MMD_AN, - autoneg_reg); + if (hw->mac.type == ixgbe_mac_X550) { + /* Set or unset auto-negotiation 5G advertisement */ + autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_5GB_FULL)) + autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE; + + /* Set or unset auto-negotiation 2.5G advertisement */ + autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_2_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_2_5GB_FULL)) + autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE; } - if (speed & IXGBE_LINK_SPEED_100_FULL) { - /* Set or unset auto-negotiation 100M advertisement */ - hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE, - MDIO_MMD_AN, - &autoneg_reg); + /* Set or unset auto-negotiation 1G advertisement */ + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) && + (speed & IXGBE_LINK_SPEED_1GB_FULL)) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; - autoneg_reg &= ~(ADVERTISE_100FULL | - ADVERTISE_100HALF); - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) - autoneg_reg |= ADVERTISE_100FULL; + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + MDIO_MMD_AN, autoneg_reg); - hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, - MDIO_MMD_AN, - autoneg_reg); - } + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, &autoneg_reg); + + autoneg_reg &= ~(ADVERTISE_100FULL | ADVERTISE_100HALF); + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) && + (speed & IXGBE_LINK_SPEED_100_FULL)) + autoneg_reg |= ADVERTISE_100FULL; + + hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, autoneg_reg); /* Blocked by MNG FW so don't reset PHY */ if (ixgbe_check_reset_blocked(hw)) @@ -830,6 +803,7 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw) hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; break; case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL; break; default: @@ -2396,9 +2370,7 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) if (!on && ixgbe_mng_present(hw)) return 0; - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - ®); + status = hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, ®); if (status) return status; @@ -2410,8 +2382,6 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) reg |= IXGBE_MDIO_PHY_SET_LOW_POWER_MODE; } - status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - reg); + status = hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, reg); return status; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index cc735ec3e045..ecf05f838fc5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -195,12 +195,8 @@ s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 *sff8472_data); s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data); -s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val); -s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 *val); -s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 val); -s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, - u16 reg, u16 val); +s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg, + u16 *val, bool lock); +s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg, + u16 val, bool lock); #endif /* _IXGBE_PHY_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 31d82e3abac8..cf21273db201 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -874,19 +874,13 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB 0x4 /* 1Gb/s */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB 0x6 /* 10Gb/s */ -#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG 0x20 /* 10G Control Reg */ #define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */ #define IXGBE_MII_AUTONEG_XNP_TX_REG 0x17 /* 1G XNP Transmit */ -#define IXGBE_MII_AUTONEG_ADVERTISE_REG 0x10 /* 100M Advertisement */ -#define IXGBE_MII_10GBASE_T_ADVERTISE 0x1000 /* full duplex, bit:12*/ #define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX 0x4000 /* full duplex, bit:14*/ #define IXGBE_MII_1GBASE_T_ADVERTISE 0x8000 /* full duplex, bit:15*/ #define IXGBE_MII_2_5GBASE_T_ADVERTISE 0x0400 #define IXGBE_MII_5GBASE_T_ADVERTISE 0x0800 -#define IXGBE_MII_100BASE_T_ADVERTISE 0x0100 /* full duplex, bit:8 */ -#define IXGBE_MII_100BASE_T_ADVERTISE_HALF 0x0080 /* half duplex, bit:7 */ #define IXGBE_MII_RESTART 0x200 -#define IXGBE_MII_AUTONEG_COMPLETE 0x20 #define IXGBE_MII_AUTONEG_LINK_UP 0x04 #define IXGBE_MII_AUTONEG_REG 0x0 @@ -1320,30 +1314,20 @@ struct ixgbe_thermal_sensor_data { /* MDIO definitions */ #define IXGBE_MDIO_ZERO_DEV_TYPE 0x0 -#define IXGBE_MDIO_PMA_PMD_DEV_TYPE 0x1 #define IXGBE_MDIO_PCS_DEV_TYPE 0x3 -#define IXGBE_MDIO_PHY_XS_DEV_TYPE 0x4 -#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE 0x7 -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE 0x1E /* Device 30 */ #define IXGBE_TWINAX_DEV 1 #define IXGBE_MDIO_COMMAND_TIMEOUT 100 /* PHY Timeout for 1 GB mode */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL 0x0 /* VS1 Control Reg */ -#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS 0x1 /* VS1 Status Reg */ #define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS 0x0008 /* 1 = Link Up */ #define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0 - 10G, 1 - 1G */ #define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED 0x0018 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED 0x0010 -#define IXGBE_MDIO_AUTO_NEG_CONTROL 0x0 /* AUTO_NEG Control Reg */ -#define IXGBE_MDIO_AUTO_NEG_STATUS 0x1 /* AUTO_NEG Status Reg */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_STAT 0xC800 /* AUTO_NEG Vendor Status Reg */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM 0xCC00 /* AUTO_NEG Vendor TX Reg */ #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2 0xCC01 /* AUTO_NEG Vendor Tx Reg */ #define IXGBE_MDIO_AUTO_NEG_VEN_LSC 0x1 /* AUTO_NEG Vendor Tx LSC */ -#define IXGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */ -#define IXGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Status Reg */ #define IXGBE_MDIO_AUTO_NEG_EEE_ADVT 0x3C /* AUTO_NEG EEE Advt Reg */ #define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE 0x0800 /* Set low power mode */ @@ -1393,8 +1377,10 @@ struct ixgbe_thermal_sensor_data { #define TN1010_PHY_ID 0x00A19410 #define TNX_FW_REV 0xB #define X540_PHY_ID 0x01540200 -#define X550_PHY_ID 0x01540220 +#define X550_PHY_ID2 0x01540223 +#define X550_PHY_ID3 0x01540221 #define X557_PHY_ID 0x01540240 +#define X557_PHY_ID2 0x01540250 #define QT2022_PHY_ID 0x0043A400 #define ATH_PHY_ID 0x03429050 #define AQ_FW_REV 0x20 @@ -3352,6 +3338,7 @@ struct ixgbe_mac_operations { s32 (*led_off)(struct ixgbe_hw *, u32); s32 (*blink_led_start)(struct ixgbe_hw *, u32); s32 (*blink_led_stop)(struct ixgbe_hw *, u32); + s32 (*init_led_link_act)(struct ixgbe_hw *); /* RAR, Multicast, VLAN */ s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32); @@ -3372,6 +3359,7 @@ struct ixgbe_mac_operations { /* Flow Control */ s32 (*fc_enable)(struct ixgbe_hw *); s32 (*setup_fc)(struct ixgbe_hw *); + void (*fc_autoneg)(struct ixgbe_hw *); /* Manageability interface */ s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); @@ -3410,16 +3398,28 @@ struct ixgbe_phy_operations { s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *); s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); - s32 (*read_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val); - s32 (*write_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val); s32 (*check_overtemp)(struct ixgbe_hw *); s32 (*set_phy_power)(struct ixgbe_hw *, bool on); s32 (*enter_lplu)(struct ixgbe_hw *); s32 (*handle_lasi)(struct ixgbe_hw *hw); - s32 (*read_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, - u16 *value); - s32 (*write_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, - u16 value); + s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, + u8 *value); + s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr, + u8 value); +}; + +struct ixgbe_link_operations { + s32 (*read_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val); + s32 (*read_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, + u16 *val); + s32 (*write_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val); + s32 (*write_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg, + u16 val); +}; + +struct ixgbe_link_info { + struct ixgbe_link_operations ops; + u8 addr; }; struct ixgbe_eeprom_info { @@ -3462,6 +3462,7 @@ struct ixgbe_mac_info { u8 san_mac_rar_index; struct ixgbe_thermal_sensor_data thermal_sensor_data; bool set_lben; + u8 led_link_act; }; struct ixgbe_phy_info { @@ -3523,6 +3524,7 @@ struct ixgbe_hw { struct ixgbe_addr_filter_info addr_ctrl; struct ixgbe_fc_info fc; struct ixgbe_phy_info phy; + struct ixgbe_link_info link; struct ixgbe_eeprom_info eeprom; struct ixgbe_bus_info bus; struct ixgbe_mbx_info mbx; @@ -3546,6 +3548,7 @@ struct ixgbe_info { const struct ixgbe_eeprom_operations *eeprom_ops; const struct ixgbe_phy_operations *phy_ops; const struct ixgbe_mbx_operations *mbx_ops; + const struct ixgbe_link_operations *link_ops; const u32 *mvals; }; @@ -3593,17 +3596,35 @@ struct ixgbe_info { #define IXGBE_FUSES0_REV_MASK (3u << 6) #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P) ? 0x8010 : 0x4010) +#define IXGBE_KRM_LINK_S1(P) ((P) ? 0x8200 : 0x4200) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) #define IXGBE_KRM_AN_CNTL_1(P) ((P) ? 0x822C : 0x422C) #define IXGBE_KRM_AN_CNTL_8(P) ((P) ? 0x8248 : 0x4248) #define IXGBE_KRM_SGMII_CTRL(P) ((P) ? 0x82A0 : 0x42A0) +#define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C) #define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634) #define IXGBE_KRM_DSP_TXFFE_STATE_5(P) ((P) ? 0x8638 : 0x4638) #define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P) ((P) ? 0x8B00 : 0x4B00) #define IXGBE_KRM_PMD_DFX_BURNIN(P) ((P) ? 0x8E00 : 0x4E00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054) #define IXGBE_KRM_TX_COEFF_CTRL_1(P) ((P) ? 0x9520 : 0x5520) #define IXGBE_KRM_RX_ANA_CTL(P) ((P) ? 0x9A00 : 0x5A00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA ~(0x3 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR BIT(20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR (0x2 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN BIT(25) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN BIT(26) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN BIT(27) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M ~(0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M BIT(28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G (0x2 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G (0x3 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN (0x4 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART BIT(31) + #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B BIT(9) #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS BIT(11) @@ -3618,6 +3639,7 @@ struct ixgbe_info { #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR BIT(18) #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX BIT(24) #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR BIT(26) +#define IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE BIT(28) #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE BIT(29) #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART BIT(31) @@ -3627,6 +3649,8 @@ struct ixgbe_info { #define IXGBE_KRM_AN_CNTL_8_LINEAR BIT(0) #define IXGBE_KRM_AN_CNTL_8_LIMITING BIT(1) +#define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE BIT(10) +#define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE BIT(11) #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D BIT(12) #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D BIT(19) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index f2b1d48a16c3..e2ff823ee202 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -851,6 +851,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = { .get_link_capabilities = &ixgbe_get_copper_link_capabilities_generic, .led_on = &ixgbe_led_on_generic, .led_off = &ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .blink_led_start = &ixgbe_blink_led_start_X540, .blink_led_stop = &ixgbe_blink_led_stop_X540, .set_rar = &ixgbe_set_rar_generic, @@ -866,6 +867,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = { .set_vfta = &ixgbe_set_vfta_generic, .fc_enable = &ixgbe_fc_enable_generic, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, .set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic, .init_uta_tables = &ixgbe_init_uta_tables_generic, .setup_sfp = NULL, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 7e6b9267ca9d..11fb433eb924 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -28,11 +28,31 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed); static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *); +static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *); +static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *); +static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *); static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw) { struct ixgbe_mac_info *mac = &hw->mac; struct ixgbe_phy_info *phy = &hw->phy; + struct ixgbe_link_info *link = &hw->link; + + /* Start with X540 invariants, since so simular */ + ixgbe_get_invariants_X540(hw); + + if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper) + phy->ops.set_phy_power = NULL; + + link->addr = IXGBE_CS4227; + + return 0; +} + +static s32 ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; /* Start with X540 invariants, since so simular */ ixgbe_get_invariants_X540(hw); @@ -69,8 +89,7 @@ static void ixgbe_setup_mux_ctl(struct ixgbe_hw *hw) */ static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value) { - return hw->phy.ops.read_i2c_combined_unlocked(hw, IXGBE_CS4227, reg, - value); + return hw->link.ops.read_link_unlocked(hw, hw->link.addr, reg, value); } /** @@ -83,8 +102,7 @@ static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value) */ static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value) { - return hw->phy.ops.write_i2c_combined_unlocked(hw, IXGBE_CS4227, reg, - value); + return hw->link.ops.write_link_unlocked(hw, hw->link.addr, reg, value); } /** @@ -322,6 +340,68 @@ static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, return IXGBE_NOT_IMPLEMENTED; } +/** + * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to read from + * @reg: I2C device register to read from + * @val: pointer to location to receive read value + * + * Returns an error code on error. + **/ +static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 *val) +{ + return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true); +} + +/** + * ixgbe_read_i2c_combined_generic_unlocked - Do I2C read combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to read from + * @reg: I2C device register to read from + * @val: pointer to location to receive read value + * + * Returns an error code on error. + **/ +static s32 +ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr, + u16 reg, u16 *val) +{ + return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false); +} + +/** + * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to write to + * @reg: I2C device register to write to + * @val: value to write + * + * Returns an error code on error. + **/ +static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, + u8 addr, u16 reg, u16 val) +{ + return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true); +} + +/** + * ixgbe_write_i2c_combined_generic_unlocked - Do I2C write combined operation + * @hw: pointer to the hardware structure + * @addr: I2C bus address to write to + * @reg: I2C device register to write to + * @val: value to write + * + * Returns an error code on error. + **/ +static s32 +ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, + u8 addr, u16 reg, u16 val) +{ + return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false); +} + /** ixgbe_init_eeprom_params_X550 - Initialize EEPROM params * @hw: pointer to hardware structure * @@ -1128,47 +1208,17 @@ out: return ret; } -/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. +/** + * ixgbe_setup_ixfi_x550em_x - MAC specific iXFI configuration * @hw: pointer to hardware structure - * @speed: the link speed to force * - * Configures the integrated KR PHY to use iXFI mode. Used to connect an - * internal and external PHY at a specific speed, without autonegotiation. + * iXfI configuration needed for ixgbe_mac_X550EM_x devices. **/ -static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) { s32 status; u32 reg_val; - /* Disable AN and force speed to 10G Serial. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - if (status) - return status; - - reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; - reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; - - /* Select forced link speed for internal PHY. */ - switch (*speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; - break; - default: - /* Other link speeds are not supported by internal KR PHY. */ - return IXGBE_ERR_LINK_SETUP; - } - - status = ixgbe_write_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - if (status) - return status; - /* Disable training protocol FSM. */ status = ixgbe_read_iosf_sb_reg_x550(hw, IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), @@ -1228,20 +1278,106 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) status = ixgbe_write_iosf_sb_reg_x550(hw, IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - if (status) + return status; +} + +/** + * ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the + * internal PHY + * @hw: pointer to hardware structure + **/ +static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw) +{ + s32 status; + u32 link_ctrl; + + /* Restart auto-negotiation. */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl); + + if (status) { + hw_dbg(hw, "Auto-negotiation did not complete\n"); return status; + } - /* Toggle port SW reset by AN reset. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl); + + if (hw->mac.type == ixgbe_mac_x550em_a) { + u32 flx_mask_st20; + + /* Indicate to FW that AN restart has been asserted */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20); + + if (status) { + hw_dbg(hw, "Auto-negotiation did not complete\n"); + return status; + } + + flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20); + } + + return status; +} + +/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode. + * @hw: pointer to hardware structure + * @speed: the link speed to force + * + * Configures the integrated KR PHY to use iXFI mode. Used to connect an + * internal and external PHY at a specific speed, without autonegotiation. + **/ +static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +{ + s32 status; + u32 reg_val; + + /* Disable AN and force speed to 10G Serial. */ + status = ixgbe_read_iosf_sb_reg_x550(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status) return status; - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; + reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; + + /* Select forced link speed for internal PHY. */ + switch (*speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; + break; + default: + /* Other link speeds are not supported by internal KR PHY. */ + return IXGBE_ERR_LINK_SETUP; + } + status = ixgbe_write_iosf_sb_reg_x550(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + if (status) + return status; + + /* Additional configuration needed for x550em_x */ + if (hw->mac.type == ixgbe_mac_X550EM_x) { + status = ixgbe_setup_ixfi_x550em_x(hw); + if (status) + return status; + } + + /* Toggle port SW reset by AN reset. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); return status; } @@ -1292,7 +1428,7 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, __always_unused bool autoneg_wait_to_complete) { s32 status; - u16 slice, value; + u16 reg_slice, reg_val; bool setup_linear = false; /* Check if SFP module is supported and linear */ @@ -1308,71 +1444,68 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, if (status) return status; - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { - /* Configure CS4227 LINE side to 10G SR. */ - slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); - value = IXGBE_CS4227_SPEED_10G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; + /* Configure internal PHY for KR/KX. */ + ixgbe_setup_kr_speed_x550em(hw, speed); - slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); - value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; - - /* Configure CS4227 for HOST connection rate then type. */ - slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); - value = speed & IXGBE_LINK_SPEED_10GB_FULL ? - IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; + /* Configure CS4227 LINE side to proper mode. */ + reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1; + else + reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1; - slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); - if (setup_linear) - value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; - else - value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; + status = hw->link.ops.write_link(hw, hw->link.addr, reg_slice, + reg_val); - /* Setup XFI internal link. */ - status = ixgbe_setup_ixfi_x550em(hw, &speed); - if (status) { - hw_dbg(hw, "setup_ixfi failed with %d\n", status); - return status; - } - } else { - /* Configure internal PHY for KR/KX. */ - status = ixgbe_setup_kr_speed_x550em(hw, speed); - if (status) { - hw_dbg(hw, "setup_kr_speed failed with %d\n", status); - return status; - } + return status; +} - /* Configure CS4227 LINE side to proper mode. */ - slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); - if (setup_linear) - value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; - else - value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, - slice, value); - if (status) - goto i2c_err; +/** + * ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode + * @hw: pointer to hardware structure + * @speed: the link speed to force + * + * Configures the integrated PHY for native SFI mode. Used to connect the + * internal PHY directly to an SFP cage, without autonegotiation. + **/ +static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +{ + struct ixgbe_mac_info *mac = &hw->mac; + s32 status; + u32 reg_val; + + /* Disable all AN and force speed to 10G Serial. */ + status = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + if (status) + return status; + + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + + /* Select forced link speed for internal PHY. */ + switch (*speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + break; + default: + /* Other link speeds are not supported by internal PHY. */ + return IXGBE_ERR_LINK_SETUP; } - return 0; + status = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + + /* Toggle port SW reset by AN reset. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); -i2c_err: - hw_dbg(hw, "combined i2c access failed with %d\n", status); return status; } @@ -1388,45 +1521,39 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed, { bool setup_linear = false; u32 reg_phy_int; - s32 rc; + s32 ret_val; /* Check if SFP module is supported and linear */ - rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); + ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (rc == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) return 0; - if (!rc) - return rc; + if (!ret_val) + return ret_val; - /* Configure internal PHY for native SFI */ - rc = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, - ®_phy_int); - if (rc) - return rc; + /* Configure internal PHY for native SFI based on module type */ + ret_val = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_phy_int); + if (!ret_val) + return ret_val; - if (setup_linear) { - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LINEAR; - } else { - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LINEAR; - } + reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA; + if (!setup_linear) + reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR; - rc = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, - reg_phy_int); - if (rc) - return rc; + ret_val = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int); + if (!ret_val) + return ret_val; - /* Setup XFI/SFI internal link */ - return ixgbe_setup_ixfi_x550em(hw, &speed); + /* Setup SFI internal link. */ + return ixgbe_setup_sfi_x550a(hw, &speed); } /** @@ -1442,19 +1569,19 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, u32 reg_slice, slice_offset; bool setup_linear = false; u16 reg_phy_ext; - s32 rc; + s32 ret_val; /* Check if SFP module is supported and linear */ - rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); + ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear); /* If no SFP module present, then return success. Return success since * SFP not present error is not excepted in the setup MAC link flow. */ - if (rc == IXGBE_ERR_SFP_NOT_PRESENT) + if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT) return 0; - if (!rc) - return rc; + if (!ret_val) + return ret_val; /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); @@ -1463,10 +1590,10 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, return IXGBE_ERR_PHY_ADDR_INVALID; /* Get external PHY device id */ - rc = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB, + ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB, IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); - if (rc) - return rc; + if (ret_val) + return ret_val; /* When configuring quad port CS4223, the MAC instance is part * of the slice offset. @@ -1538,7 +1665,7 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, bool link_up_wait_to_complete) { u32 status; - u16 autoneg_status; + u16 i, autoneg_status; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; @@ -1550,14 +1677,18 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, if (status || !(*link_up)) return status; - /* MAC link is up, so check external PHY link. - * Read this twice back to back to indicate current status. - */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_status); - if (status) - return status; + /* MAC link is up, so check external PHY link. + * Link status is latching low, and can only be used to detect link + * drop, and not the current status of the link without performing + * back-to-back reads. + */ + for (i = 0; i < 2; i++) { + status = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN, + &autoneg_status); + + if (status) + return status; + } /* If external PHY link is not up, then indicate link not up */ if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS)) @@ -1575,7 +1706,7 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed, __always_unused bool autoneg_wait_to_complete) { struct ixgbe_mac_info *mac = &hw->mac; - u32 lval, sval; + u32 lval, sval, flx_val; s32 rc; rc = mac->ops.read_iosf_sb_reg(hw, @@ -1609,14 +1740,55 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed, if (rc) return rc; - lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; + + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; + + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + rc = mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, lval); + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val); + if (rc) + return rc; + rc = ixgbe_restart_an_internal_phy_x550em(hw); return rc; } +/** ixgbe_init_mac_link_ops_X550em_a - Init mac link function pointers + * @hw: pointer to hardware structure + **/ +static void ixgbe_init_mac_link_ops_X550em_a(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + + switch (mac->ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + mac->ops.setup_fc = NULL; + mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a; + break; + case ixgbe_media_type_backplane: + mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a; + mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a; + break; + default: + break; + } +} + /** ixgbe_init_mac_link_ops_X550em - init mac link function pointers * @hw: pointer to hardware structure **/ @@ -1664,6 +1836,10 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw) default: break; } + + /* Additional modification for X550em_a devices */ + if (hw->mac.type == ixgbe_mac_x550em_a) + ixgbe_init_mac_link_ops_X550em_a(hw); } /** ixgbe_setup_sfp_modules_X550em - Setup SFP module @@ -1740,7 +1916,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Vendor alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status || !(reg & IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN)) @@ -1748,7 +1924,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Vendor Auto-Neg alarm triggered or Global alarm 1 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status || !(reg & (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN | @@ -1757,7 +1933,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Global alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) @@ -1772,7 +1948,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { /* device fault alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1787,14 +1963,14 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) /* Vendor alarm 2 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); + MDIO_MMD_AN, ®); if (status || !(reg & IXGBE_MDIO_GLOBAL_STD_ALM2_INT)) return status; /* link connect/disconnect event occurred */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); + MDIO_MMD_AN, ®); if (status) return status; @@ -1826,20 +2002,20 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) /* Enable link status change alarm */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); + MDIO_MMD_AN, ®); if (status) return status; reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg); + MDIO_MMD_AN, reg); if (status) return status; /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1848,14 +2024,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); if (status) return status; /* Enable vendor Auto-Neg alarm and Global Interrupt Mask 1 alarm */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1864,14 +2040,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) IXGBE_MDIO_GLOBAL_ALARM_1_INT); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); if (status) return status; /* Enable chip-wide vendor alarm */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -1879,7 +2055,7 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) reg |= IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN; status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); return status; @@ -1945,13 +2121,31 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_1GB_FULL) reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX; - /* Restart auto-negotiation. */ - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; status = hw->mac.ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - return status; + if (hw->mac.type == ixgbe_mac_x550em_a) { + /* Set lane mode to KR auto negotiation */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + + if (status) + return status; + + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + } + + return ixgbe_restart_an_internal_phy_x550em(hw); } /** ixgbe_setup_kx4_x550em - Configure the KX4 PHY. @@ -2020,14 +2214,12 @@ static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up) *link_up = false; /* read this twice back to back to indicate current status */ - ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN, &autoneg_status); if (ret) return ret; - ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN, &autoneg_status); if (ret) return ret; @@ -2073,7 +2265,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) return 0; status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &speed); if (status) return status; @@ -2134,10 +2326,10 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + MDIO_MMD_VEND1, &phy_data); phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK; hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + MDIO_MMD_VEND1, phy_data); return 0; } @@ -2156,10 +2348,10 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) /* To turn on the LED, set mode to ON. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + MDIO_MMD_VEND1, &phy_data); phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK; hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + MDIO_MMD_VEND1, phy_data); return 0; } @@ -2180,7 +2372,7 @@ static s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw, *lcd_speed = IXGBE_LINK_SPEED_UNKNOWN; status = hw->phy.ops.read_reg(hw, IXGBE_AUTO_NEG_LP_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &an_lp_status); if (status) return status; @@ -2281,6 +2473,90 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw) return rc; } +/** + * ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + **/ +static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) +{ + u32 link_s1, lp_an_page_low, an_cntl_1; + s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; + ixgbe_link_speed speed; + bool link_up; + + /* AN should have completed when the cable was plugged in. + * Look for reasons to bail out. Bail out if: + * - FC autoneg is disabled, or if + * - link is not up. + */ + if (hw->fc.disable_fc_autoneg) { + hw_err(hw, "Flow control autoneg is disabled"); + goto out; + } + + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (!link_up) { + hw_err(hw, "The link is down"); + goto out; + } + + /* Check at auto-negotiation has completed */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_S1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1); + + if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + status = IXGBE_ERR_FC_NOT_NEGOTIATED; + goto out; + } + + /* Read the 10g AN autoc and LP ability registers and resolve + * local flow control settings accordingly + */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1); + + if (status) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + goto out; + } + + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low); + + if (status) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + goto out; + } + + status = ixgbe_negotiate_fc(hw, an_cntl_1, lp_an_page_low, + IXGBE_KRM_AN_CNTL_1_SYM_PAUSE, + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE, + IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE, + IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE); + +out: + if (!status) { + hw->fc.fc_was_autonegged = true; + } else { + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; + } +} + +/** + * ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings + * @hw: pointer to hardware structure + **/ +static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw) +{ + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; +} + /** ixgbe_enter_lplu_x550em - Transition to low power states * @hw: pointer to hardware structure * @@ -2327,7 +2603,7 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) return ixgbe_set_copper_phy_power(hw, false); status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &speed); if (status) return status; @@ -2349,20 +2625,20 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) /* Clear AN completed indication */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &autoneg_reg); if (status) return status; - status = hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + status = hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, + MDIO_MMD_AN, &an_10g_cntl_reg); if (status) return status; status = hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + MDIO_MMD_AN, &autoneg_reg); if (status) return status; @@ -2520,7 +2796,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_TX_VENDOR_ALARMS_3, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, + MDIO_MMD_PMAPMD, ®); if (status) return status; @@ -2531,7 +2807,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) if (reg & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) { status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_RES_PR_10, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, ®); if (status) return status; @@ -2540,7 +2816,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_RES_PR_10, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + MDIO_MMD_VEND1, reg); if (status) return status; @@ -2729,6 +3005,90 @@ static void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, } /** + * ixgbe_setup_fc_backplane_x550em_a - Set up flow control + * @hw: pointer to hardware structure + * + * Called at init time to set up flow control. + **/ +static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 an_cntl = 0; + + /* Validate the requested mode */ + if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { + hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + return IXGBE_ERR_INVALID_LINK_SETTINGS; + } + + if (hw->fc.requested_mode == ixgbe_fc_default) + hw->fc.requested_mode = ixgbe_fc_full; + + /* Set up the 1G and 10G flow control advertisement registers so the + * HW will be able to do FC autoneg once the cable is plugged in. If + * we link at 10G, the 1G advertisement is harmless and vice versa. + */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl); + + if (status) { + hw_dbg(hw, "Auto-Negotiation did not complete\n"); + return status; + } + + /* The possible values of fc.requested_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.requested_mode) { + case ixgbe_fc_none: + /* Flow control completely disabled by software override. */ + an_cntl &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); + break; + case ixgbe_fc_tx_pause: + /* Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + an_cntl |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; + an_cntl &= ~IXGBE_KRM_AN_CNTL_1_SYM_PAUSE; + break; + case ixgbe_fc_rx_pause: + /* Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE, as such we fall + * through to the fc_full statement. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; + break; + default: + hw_err(hw, "Flow control param set incorrectly\n"); + return IXGBE_ERR_CONFIG; + } + + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl); + + /* Restart auto-negotiation. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); + + return status; +} + +/** * ixgbe_set_mux - Set mux for port 1 access with CS4227 * @hw: pointer to hardware structure * @state: set mux if 1, clear if 0 @@ -2934,6 +3294,7 @@ static const struct ixgbe_mac_operations mac_ops_X550 = { X550_COMMON_MAC .led_on = ixgbe_led_on_generic, .led_off = ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, .reset_hw = &ixgbe_reset_hw_X540, .get_media_type = &ixgbe_get_media_type_X540, .get_san_mac_addr = &ixgbe_get_san_mac_addr_generic, @@ -2948,12 +3309,14 @@ static const struct ixgbe_mac_operations mac_ops_X550 = { .prot_autoc_read = prot_autoc_read_generic, .prot_autoc_write = prot_autoc_write_generic, .setup_fc = ixgbe_setup_fc_generic, + .fc_autoneg = ixgbe_fc_autoneg, }; static const struct ixgbe_mac_operations mac_ops_X550EM_x = { X550_COMMON_MAC .led_on = ixgbe_led_on_t_x550em, .led_off = ixgbe_led_off_t_x550em, + .init_led_link_act = ixgbe_init_led_link_act_generic, .reset_hw = &ixgbe_reset_hw_X550em, .get_media_type = &ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, @@ -2966,6 +3329,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = { .release_swfw_sync = &ixgbe_release_swfw_sync_X550em, .init_swfw_sync = &ixgbe_init_swfw_sync_X540, .setup_fc = NULL, /* defined later */ + .fc_autoneg = ixgbe_fc_autoneg, .read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550, .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550, }; @@ -2974,6 +3338,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = { X550_COMMON_MAC .led_on = ixgbe_led_on_t_x550em, .led_off = ixgbe_led_off_t_x550em, + .init_led_link_act = ixgbe_init_led_link_act_generic, .reset_hw = ixgbe_reset_hw_X550em, .get_media_type = ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, @@ -2985,6 +3350,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = { .acquire_swfw_sync = ixgbe_acquire_swfw_sync_x550em_a, .release_swfw_sync = ixgbe_release_swfw_sync_x550em_a, .setup_fc = ixgbe_setup_fc_x550em, + .fc_autoneg = ixgbe_fc_autoneg, .read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550a, .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a, }; @@ -3036,11 +3402,6 @@ static const struct ixgbe_phy_operations phy_ops_X550EM_x = { .identify = &ixgbe_identify_phy_x550em, .read_reg = &ixgbe_read_phy_reg_generic, .write_reg = &ixgbe_write_phy_reg_generic, - .read_i2c_combined = &ixgbe_read_i2c_combined_generic, - .write_i2c_combined = &ixgbe_write_i2c_combined_generic, - .read_i2c_combined_unlocked = &ixgbe_read_i2c_combined_generic_unlocked, - .write_i2c_combined_unlocked = - &ixgbe_write_i2c_combined_generic_unlocked, }; static const struct ixgbe_phy_operations phy_ops_x550em_a = { @@ -3053,6 +3414,13 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = { .write_reg_mdi = &ixgbe_write_phy_reg_mdi, }; +static const struct ixgbe_link_operations link_ops_x550em_x = { + .read_link = &ixgbe_read_i2c_combined_generic, + .read_link_unlocked = &ixgbe_read_i2c_combined_generic_unlocked, + .write_link = &ixgbe_write_i2c_combined_generic, + .write_link_unlocked = &ixgbe_write_i2c_combined_generic_unlocked, +}; + static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = { IXGBE_MVALS_INIT(X550) }; @@ -3083,11 +3451,12 @@ const struct ixgbe_info ixgbe_X550EM_x_info = { .phy_ops = &phy_ops_X550EM_x, .mbx_ops = &mbx_ops_generic, .mvals = ixgbe_mvals_X550EM_x, + .link_ops = &link_ops_x550em_x, }; const struct ixgbe_info ixgbe_x550em_a_info = { .mac = ixgbe_mac_x550em_a, - .get_invariants = &ixgbe_get_invariants_X550_x, + .get_invariants = &ixgbe_get_invariants_X550_a, .mac_ops = &mac_ops_x550em_a, .eeprom_ops = &eeprom_ops_X550EM_x, .phy_ops = &phy_ops_x550em_a, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d2775f032f74..d316f503a727 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1498,6 +1498,9 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter) { int i, q_vectors; + if (!adapter->msix_entries) + return; + q_vectors = adapter->num_msix_vectors; i = q_vectors - 1; @@ -2552,6 +2555,9 @@ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter) **/ static void ixgbevf_reset_interrupt_capability(struct ixgbevf_adapter *adapter) { + if (!adapter->msix_entries) + return; + pci_disable_msix(adapter->pdev); kfree(adapter->msix_entries); adapter->msix_entries = NULL; @@ -3794,11 +3800,10 @@ static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state) ixgbevf_free_irq(adapter); ixgbevf_free_all_tx_resources(adapter); ixgbevf_free_all_rx_resources(adapter); + ixgbevf_clear_interrupt_scheme(adapter); rtnl_unlock(); } - ixgbevf_clear_interrupt_scheme(adapter); - #ifdef CONFIG_PM retval = pci_save_state(pdev); if (retval) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 0f6225c042be..9bf7320107b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2747,8 +2747,11 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) } err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); - if (err) + if (err) { + if (prog) + bpf_prog_sub(prog, priv->rx_ring_num - 1); goto unlock_out; + } if (priv->port_up) { port_up = 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ea8af475e7d2..ac09767b6984 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -893,5 +893,9 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); +void mlx5e_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti); +void mlx5e_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 8ff22e83e1dd..68419a01db36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -174,6 +174,11 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_destination dest; struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_spec *spec; @@ -206,8 +211,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, } arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, + &flow_act, &dest, 1); if (IS_ERR(arfs_t->default_rule)) { err = PTR_ERR(arfs_t->default_rule); @@ -324,7 +328,7 @@ static int arfs_create_table(struct mlx5e_priv *priv, int err; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL); + MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -465,6 +469,11 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct arfs_rule *arfs_rule) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; @@ -544,9 +553,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; - rule = mlx5_add_flow_rules(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest, 1); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index bed544d47ba1..1fe80de5d68f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -158,6 +158,11 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_handle **rule_p; @@ -187,10 +192,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; } - *rule_p = mlx5_add_flow_rules(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest, 1); + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); @@ -623,6 +625,11 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; @@ -644,10 +651,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); } - rule = mlx5_add_flow_rules(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - dest, 1); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule failed\n", __func__); @@ -777,7 +781,7 @@ static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) int err; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL); + MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -810,6 +814,11 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_spec *spec; @@ -848,9 +857,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, break; } - ai->rule = mlx5_add_flow_rules(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest, 1); + ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(ai->rule)) { netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); @@ -948,7 +955,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv) ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL); + MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1038,7 +1045,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL); + MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index cf52c06377f2..3691451c728c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -99,7 +99,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, MLX5E_ETHTOOL_NUM_ENTRIES); ft = mlx5_create_auto_grouped_flow_table(ns, prio, table_size, - MLX5E_ETHTOOL_NUM_GROUPS, 0); + MLX5E_ETHTOOL_NUM_GROUPS, 0, 0); if (IS_ERR(ft)) return (void *)ft; @@ -290,10 +290,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, struct ethtool_rx_flow_spec *fs) { struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; struct mlx5_flow_handle *rule; int err = 0; - u32 action; spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) @@ -304,7 +304,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, goto free; if (fs->ring_cookie == RX_CLS_FLOW_DISC) { - action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { @@ -314,12 +314,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; - action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - rule = mlx5_add_flow_rules(ft, spec, action, - MLX5_FS_DEFAULT_FLOW_TAG, dst, 1); + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ba0c774a4cdd..313b765e2d4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2995,8 +2995,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev, vf_stats); } -static void mlx5e_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3009,8 +3009,8 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1); } -static void mlx5e_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index c1a7b058b4f6..47dfd5b14dfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -256,6 +256,8 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, .ndo_get_stats64 = mlx5e_get_stats, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, }; static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 165682e2d2be..9d133fc6c65e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -40,17 +40,26 @@ #include <net/switchdev.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> +#include <net/vxlan.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" +#include "vxlan.h" struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; struct mlx5_flow_handle *rule; + struct list_head encap; /* flows sharing the same encap */ struct mlx5_esw_flow_attr *attr; }; +enum { + MLX5_HEADER_TYPE_VXLAN = 0x0, + MLX5_HEADER_TYPE_NVGRE = 0x1, +}; + #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 @@ -61,6 +70,11 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest = { 0 }; + struct mlx5_flow_act flow_act = { + .action = action, + .flow_tag = flow_tag, + .encap_id = 0, + }; struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; bool table_created = false; @@ -83,7 +97,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, MLX5E_TC_PRIO, MLX5E_TC_TABLE_NUM_ENTRIES, MLX5E_TC_TABLE_NUM_GROUPS, - 0); + 0, 0); if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); @@ -95,9 +109,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, - action, flow_tag, - &dest, 1); + rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) goto err_add_rule; @@ -152,6 +164,121 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } +static void parse_vxlan_attr(struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(key->keyid)); + } +} + +static int parse_tunnel_attr(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f) +{ + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + f->mask); + + /* Full udp dst port must be given */ + if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) + return -EOPNOTSUPP; + + /* udp src port isn't supported */ + if (memchr_inv(&mask->src, 0, sizeof(mask->src))) + return -EOPNOTSUPP; + + if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + parse_vxlan_attr(spec, f); + else + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_dport, ntohs(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_dport, ntohs(key->dst)); + + } else { /* udp dst port must be given */ + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + f->mask); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4, + ntohl(key->src)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(key->dst)); + } + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + + /* Enforce DMAC when offloading incoming tunneled flows. + * Flow counters require a match on the DMAC. + */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dmac_47_16), priv->netdev->dev_addr); + + /* let software handle IP fragments */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); + + return 0; +} + static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { @@ -169,12 +296,44 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS))) { + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) { netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", f->dissector->used_keys); return -EOPNOTSUPP; } + if ((dissector_uses_key(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) || + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) && + dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + f->key); + switch (key->addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + if (parse_tunnel_attr(priv, spec, f)) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + + /* In decap flow, header pointers should point to the inner + * headers, outer header were already set by parse_tunnel_attr + */ + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + inner_headers); + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = skb_flow_dissector_target(f->dissector, @@ -386,11 +545,243 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } +static inline int cmp_encap_info(struct mlx5_encap_info *a, + struct mlx5_encap_info *b) +{ + return memcmp(a, b, sizeof(*a)); +} + +static inline int hash_encap_info(struct mlx5_encap_info *info) +{ + return jhash(info, sizeof(*info), 0); +} + +static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + __be32 *saddr, + int *out_ttl) +{ + struct rtable *rt; + struct neighbour *n = NULL; + int ttl; + +#if IS_ENABLED(CONFIG_INET) + rt = ip_route_output_key(dev_net(mirred_dev), fl4); + if (IS_ERR(rt)) { + pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr); + return -EOPNOTSUPP; + } +#else + return -EOPNOTSUPP; +#endif + + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) { + pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n", + __func__); + ip_rt_put(rt); + return -EOPNOTSUPP; + } + + ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &fl4->daddr); + ip_rt_put(rt); + if (!n) + return -ENOMEM; + + *out_n = n; + *saddr = fl4->saddr; + *out_ttl = ttl; + *out_dev = rt->dst.dev; + + return 0; +} + +static int gen_vxlan_header_ipv4(struct net_device *out_dev, + char buf[], + unsigned char h_dest[ETH_ALEN], + int ttl, + __be32 daddr, + __be32 saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN; + struct ethhdr *eth = (struct ethhdr *)buf; + struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IP); + + ip->daddr = daddr; + ip->saddr = saddr; + + ip->ttl = ttl; + ip->protocol = IPPROTO_UDP; + ip->version = 0x4; + ip->ihl = 0x5; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); + + return encap_size; +} + +static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5_encap_entry *e, + struct net_device **out_dev) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct flowi4 fl4 = {}; + struct neighbour *n; + char *encap_header; + int encap_size; + __be32 saddr; + int ttl; + int err; + + encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl4.flowi4_proto = IPPROTO_UDP; + fl4.fl4_dport = e->tun_info.tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto out; + } + fl4.daddr = e->tun_info.daddr; + + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, + &fl4, &n, &saddr, &ttl); + if (err) + goto out; + + e->n = n; + e->out_dev = *out_dev; + + if (!(n->nud_state & NUD_VALID)) { + err = -ENOTSUPP; + goto out; + } + + neigh_ha_snapshot(e->h_dest, n, *out_dev); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + e->h_dest, ttl, + e->tun_info.daddr, + saddr, e->tun_info.tp_dst, + e->tun_info.tun_id); + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + encap_size, encap_header, &e->encap_id); +out: + kfree(encap_header); + return err; +} + +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned short family = ip_tunnel_info_af(tun_info); + struct ip_tunnel_key *key = &tun_info->key; + struct mlx5_encap_info info; + struct mlx5_encap_entry *e; + struct net_device *out_dev; + uintptr_t hash_key; + bool found = false; + int tunnel_type; + int err; + + /* udp dst port must be given */ + if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) + return -EOPNOTSUPP; + + if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { + info.tp_dst = key->tp_dst; + info.tun_id = tunnel_id_to_key32(key->tun_id); + tunnel_type = MLX5_HEADER_TYPE_VXLAN; + } else { + return -EOPNOTSUPP; + } + + switch (family) { + case AF_INET: + info.daddr = key->u.ipv4.dst; + break; + default: + return -EOPNOTSUPP; + } + + hash_key = hash_encap_info(&info); + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + if (!cmp_encap_info(&e->tun_info, &info)) { + found = true; + break; + } + } + + if (found) { + attr->encap = e; + return 0; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->tun_info = info; + e->tunnel_type = tunnel_type; + INIT_LIST_HEAD(&e->flows); + + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + if (err) + goto out_err; + + attr->encap = e; + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + + return err; + +out_err: + kfree(e); + return err; +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - struct mlx5_esw_flow_attr *attr) + struct mlx5e_tc_flow *flow) { + struct mlx5_esw_flow_attr *attr = flow->attr; + struct ip_tunnel_info *info = NULL; const struct tc_action *a; LIST_HEAD(actions); + bool encap = false; + int err; if (tc_no_actions(exts)) return -EINVAL; @@ -413,16 +804,37 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); - if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) { + if (switchdev_port_same_parent_id(priv->netdev, + out_dev)) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + out_priv = netdev_priv(out_dev); + attr->out_rep = out_priv->ppriv; + } else if (encap) { + err = mlx5e_attach_encap(priv, info, + out_dev, attr); + if (err) + return err; + list_add(&flow->encap, &attr->encap->flows); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + out_priv = netdev_priv(attr->encap->out_dev); + attr->out_rep = out_priv->ppriv; + } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); return -EINVAL; } + continue; + } - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(out_dev); - attr->out_rep = out_priv->ppriv; + if (is_tcf_tunnel_set(a)) { + info = tcf_tunnel_info(a); + if (info) + encap = true; + else + return -EOPNOTSUPP; continue; } @@ -439,6 +851,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_tunnel_release(a)) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + continue; + } + return -EINVAL; } return 0; @@ -487,7 +904,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (fdb_flow) { flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); - err = parse_tc_fdb_actions(priv, f->exts, flow->attr); + err = parse_tc_fdb_actions(priv, f->exts, flow); if (err < 0) goto err_free; flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); @@ -524,6 +941,24 @@ out: return err; } +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) { + struct list_head *next = flow->encap.next; + + list_del(&flow->encap); + if (list_empty(next)) { + struct mlx5_encap_entry *e; + + e = list_entry(next, struct mlx5_encap_entry, flows); + if (e->n) { + mlx5_encap_dealloc(priv->mdev, e->encap_id); + neigh_release(e->n); + } + hlist_del_rcu(&e->encap_hlist); + kfree(e); + } +} + int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { @@ -539,6 +974,9 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, mlx5e_tc_del_flow(priv, flow->rule, flow->attr); + if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); + kfree(flow); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9ee002ecb4bb..9734ac89826e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -244,6 +244,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, int match_header = (is_zero_ether_addr(mac_c) ? 0 : MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_spec *spec; void *mv_misc = NULL; @@ -285,10 +286,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", dmac_v, dmac_c, vport); spec->match_criteria_enable = match_header; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", @@ -361,7 +362,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); @@ -1212,6 +1213,7 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; u8 *smac_v; @@ -1264,10 +1266,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); esw_warn(esw->dev, @@ -1278,10 +1280,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; vport->ingress.drop_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); esw_warn(esw->dev, @@ -1301,6 +1303,7 @@ out: static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; @@ -1338,10 +1341,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->egress.allowed_vlan = mlx5_add_flow_rules(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); esw_warn(esw->dev, @@ -1353,10 +1356,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, /* Drop others rule (star rule) */ memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; vport->egress.drop_rule = mlx5_add_flow_rules(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); esw_warn(esw->dev, @@ -1779,6 +1782,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + hash_init(esw->offloads.encap_tbl); mutex_init(&esw->state_lock); for (vport_num = 0; vport_num < total_vports; vport_num++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6d414cb1b75f..40482e841413 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -199,6 +199,7 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; + DECLARE_HASHTABLE(encap_tbl, 8); }; struct mlx5_eswitch { @@ -272,6 +273,24 @@ enum { #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 +struct mlx5_encap_info { + __be32 daddr; + __be32 tun_id; + __be16 tp_dst; +}; + +struct mlx5_encap_entry { + struct hlist_node encap_hlist; + struct list_head flows; + u32 encap_id; + struct neighbour *n; + struct mlx5_encap_info tun_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int tunnel_type; +}; + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; struct mlx5_eswitch_rep *out_rep; @@ -279,6 +298,7 @@ struct mlx5_esw_flow_attr { int action; u16 vlan; bool vlan_handled; + struct mlx5_encap_entry *encap; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53d9d6ce008b..50fe8e8861bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -49,23 +49,23 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_act flow_act = {0}; struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; void *misc; - int action; int i = 0; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - action = attr->action; + flow_act.action = attr->action; - if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[i].vport_num = attr->out_rep->vport; i++; } - if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); @@ -82,9 +82,14 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + + if (attr->encap) + flow_act.encap_id = attr->encap->encap_id; rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, - spec, action, 0, dest, i); + spec, &flow_act, dest, i); if (IS_ERR(rule)) mlx5_fc_destroy(esw->dev, counter); @@ -274,6 +279,7 @@ out: static struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; @@ -297,10 +303,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); out: @@ -363,6 +369,7 @@ out_err: static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; @@ -377,10 +384,10 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); @@ -407,6 +414,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) u32 *flow_group_in; void *match_criteria; int table_size, ix, err = 0; + u32 flags = 0; flow_group_in = mlx5_vzalloc(inlen); if (!flow_group_in) @@ -421,9 +429,14 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + flags |= MLX5_FLOW_TABLE_TUNNEL_EN; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, ESW_OFFLOADS_NUM_ENTRIES, - ESW_OFFLOADS_NUM_GROUPS, 0); + ESW_OFFLOADS_NUM_GROUPS, 0, + flags); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); @@ -432,7 +445,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.fdb = fdb; table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0); + fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); @@ -524,7 +537,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -ENOMEM; } - ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); if (IS_ERR(ft_offloads)) { err = PTR_ERR(ft_offloads); esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); @@ -590,6 +603,7 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; @@ -612,9 +626,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = tirn; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 113c32326333..c4478ecd8056 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -37,6 +37,7 @@ #include "fs_core.h" #include "fs_cmd.h" #include "mlx5_core.h" +#include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft) @@ -61,8 +62,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id) + *next_ft, unsigned int *table_id, u32 flags) { + int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; @@ -78,6 +80,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, other_vport, 1); } + MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap); + MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap); + switch (op_mod) { case FS_FT_OP_MOD_NORMAL: if (next_ft) { @@ -243,6 +248,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); + MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); @@ -453,27 +459,32 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, *bytes = MLX5_GET64(traffic_counter, stats, octets); } -#define MAX_ENCAP_SIZE (128) - -int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id) +int mlx5_encap_alloc(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id) { + int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; - u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) + - (MAX_ENCAP_SIZE / sizeof(u32))]; - void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, - encap_header); - void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in, - encap_header); - int inlen = header - (void *)in + size; + void *encap_header_in; + void *header; + int inlen; int err; + u32 *in; - if (size > MAX_ENCAP_SIZE) + if (size > MLX5_CAP_ESW(dev, max_encap_header_size)) return -EINVAL; + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + max_encap_size, + GFP_KERNEL); + if (!in) + return -ENOMEM; + + encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header); + header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header); + inlen = header - (void *)in + size; + memset(in, 0, inlen); MLX5_SET(alloc_encap_header_in, in, opcode, MLX5_CMD_OP_ALLOC_ENCAP_HEADER); @@ -485,10 +496,11 @@ int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + kfree(in); return err; } -void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id) +void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) { u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index c5bc4686c832..8fad80688536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -38,7 +38,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id); + *next_ft, unsigned int *table_id, u32 flags); int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft); @@ -89,11 +89,4 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b, u16 id, u64 *packets, u64 *bytes); -int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id); -void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id); - #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e65eabf9c850..9adc766c7a3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -460,8 +460,7 @@ static void del_flow_group(struct fs_node *node) fg->id, ft->id); } -static struct fs_fte *alloc_fte(u8 action, - u32 flow_tag, +static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, u32 *match_value, unsigned int index) { @@ -473,9 +472,10 @@ static struct fs_fte *alloc_fte(u8 action, memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; - fte->flow_tag = flow_tag; + fte->flow_tag = flow_act->flow_tag; fte->index = index; - fte->action = action; + fte->action = flow_act->action; + fte->encap_id = flow_act->encap_id; return fte; } @@ -505,7 +505,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, enum fs_flow_table_type table_type, - enum fs_flow_table_op_mod op_mod) + enum fs_flow_table_op_mod op_mod, + u32 flags) { struct mlx5_flow_table *ft; @@ -519,6 +520,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->type = table_type; ft->vport = vport; ft->max_fte = max_fte; + ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); @@ -777,7 +779,8 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, enum fs_flow_table_op_mod op_mod, u16 vport, int prio, - int max_fte, u32 level) + int max_fte, u32 level, + u32 flags) { struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_table *ft; @@ -810,7 +813,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa vport, max_fte ? roundup_pow_of_two(max_fte) : 0, root->table_type, - op_mod); + op_mod, flags); if (!ft) { err = -ENOMEM; goto unlock_root; @@ -820,7 +823,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, - ft->level, log_table_sz, next_ft, &ft->id); + ft->level, log_table_sz, next_ft, &ft->id, + ft->flags); if (err) goto free_ft; @@ -845,10 +849,11 @@ unlock_root: struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, - u32 level) + u32 level, + u32 flags) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, - max_fte, level); + max_fte, level, flags); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, @@ -856,7 +861,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace u32 level, u16 vport) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, - max_fte, level); + max_fte, level, 0); } struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( @@ -864,7 +869,7 @@ struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( int prio, u32 level) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, - level); + level, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); @@ -872,14 +877,15 @@ struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_nam int prio, int num_flow_table_entries, int max_num_groups, - u32 level) + u32 level, + u32 flags) { struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level); + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags); if (IS_ERR(ft)) return ft; @@ -1111,15 +1117,14 @@ static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, /* prev is output, prev->next = new_fte */ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, - u8 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct list_head **prev) { struct fs_fte *fte; int index; index = get_free_fte_index(fg, prev); - fte = alloc_fte(action, flow_tag, match_value, index); + fte = alloc_fte(flow_act, match_value, index); if (IS_ERR(fte)) return fte; @@ -1213,8 +1218,7 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, - u8 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) { @@ -1228,12 +1232,13 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, fs_for_each_fte(fte, fg) { nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && - (action & fte->action) && flow_tag == fte->flow_tag) { + (flow_act->action & fte->action) && + flow_act->flow_tag == fte->flow_tag) { int old_action = fte->action; - fte->action |= action; + fte->action |= flow_act->action; handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != action); + old_action != flow_act->action); if (IS_ERR(handle)) { fte->action = old_action; goto unlock_fte; @@ -1249,7 +1254,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, goto unlock_fg; } - fte = create_fte(fg, match_value, action, flow_tag, &prev); + fte = create_fte(fg, match_value, flow_act, &prev); if (IS_ERR(fte)) { handle = (void *)fte; goto unlock_fg; @@ -1326,17 +1331,17 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) + { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; int i; for (i = 0; i < dest_num; i++) { - if (!dest_is_valid(&dest[i], action, ft)) + if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } @@ -1347,7 +1352,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, g->mask.match_criteria, spec->match_criteria)) { rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest, dest_num); + flow_act, dest, dest_num); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) goto unlock; } @@ -1359,8 +1364,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest, dest_num); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. @@ -1384,8 +1388,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) { @@ -1393,11 +1396,11 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_destination gen_dest; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; - u32 sw_action = action; + u32 sw_action = flow_act->action; struct fs_prio *prio; fs_get_obj(prio, ft->node.parent); - if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!fwd_next_prio_supported(ft)) return ERR_PTR(-EOPNOTSUPP); if (dest) @@ -1409,15 +1412,14 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, gen_dest.ft = next_ft; dest = &gen_dest; dest_num = 1; - action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { mutex_unlock(&root->chain_lock); return ERR_PTR(-EOPNOTSUPP); } } - handle = _mlx5_add_flow_rules(ft, spec, action, flow_tag, dest, - dest_num); + handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num); if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!IS_ERR_OR_NULL(handle) && @@ -1822,7 +1824,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); if (IS_ERR(ft)) { mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index d5150888645c..8e668c63f69e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -117,6 +117,7 @@ struct mlx5_flow_table { struct mutex lock; /* FWD rules that point on this flow table */ struct list_head fwd_rules; + u32 flags; }; struct mlx5_fc_cache { @@ -150,6 +151,7 @@ struct fs_fte { u32 flow_tag; u32 index; u32 action; + u32 encap_id; enum fs_fte_status status; struct mlx5_fc *counter; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1933b3ca0b77..4762bb9d013c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -121,6 +121,12 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); +int mlx5_encap_alloc(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id); +void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 708736f387e2..d147ddd97997 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -60,7 +60,7 @@ #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 #define MLXSW_PCI_FW_READY 0xA1844 -#define MLXSW_PCI_FW_READY_MASK 0xFF +#define MLXSW_PCI_FW_READY_MASK 0xFFFF #define MLXSW_PCI_FW_READY_MAGIC 0x5E #define MLXSW_PCI_DOORBELL_SDQ_OFFSET 0x000 diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index a61ce34ac6d0..edad7cb62475 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4424,7 +4424,7 @@ enum mlxsw_reg_mfcr_pwm_frequency { * Controls the frequency of the PWM signal. * Access: RW */ -MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6); +MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 7); #define MLXSW_MFCR_TACHOS_MAX 10 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8bca020e3fae..a5433e425484 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -54,7 +54,6 @@ #include <linux/dcbnl.h> #include <linux/inetdevice.h> #include <net/switchdev.h> -#include <generated/utsrelease.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 348c77339d88..df31f3861c4f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1877,6 +1877,9 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) int i; int err; + if (mlxsw_sp->router.aborted) + return; + dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n"); for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; if (!vr->used) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index ec0b27e72a5d..1552594b2d1f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -43,7 +43,6 @@ #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/switchdev.h> -#include <generated/utsrelease.h> #include "pci.h" #include "core.h" diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 5208764797ba..60f19fb68e5f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -45,7 +45,6 @@ #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <net/switchdev.h> -#include <generated/utsrelease.h> #include "pci.h" #include "core.h" diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h index 87aa8a3e9112..76a19f1796af 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -62,6 +62,7 @@ enum nfp_bpf_action_type { NN_ACT_TC_DROP, NN_ACT_TC_REDIR, NN_ACT_DIRECT, + NN_ACT_XDP, }; /* Software register representation, hardware encoding in asm.h */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index f8df5300f49c..335beb8b8b45 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -1126,7 +1126,7 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) meta->insn.src_reg * 2, true, 4); } -static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off == offsetof(struct sk_buff, len)) emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), @@ -1134,12 +1134,42 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) else return -ENOTSUPP; - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + return 0; +} + +static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 dst = reg_both(meta->insn.dst_reg * 2); + + if (meta->insn.off != offsetof(struct xdp_md, data) && + meta->insn.off != offsetof(struct xdp_md, data_end)) + return -ENOTSUPP; + + emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); + + if (meta->insn.off == offsetof(struct xdp_md, data)) + return 0; + + emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, NFP_BPF_ABI_LEN); return 0; } -static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + int ret; + + if (nfp_prog->act == NN_ACT_XDP) + ret = mem_ldx4_xdp(nfp_prog, meta); + else + ret = mem_ldx4_skb(nfp_prog, meta); + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return ret; +} + +static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off == offsetof(struct sk_buff, mark)) return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); @@ -1147,6 +1177,18 @@ static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return -ENOTSUPP; } +static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return -ENOTSUPP; +} + +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_prog->act == NN_ACT_XDP) + return mem_stx4_xdp(nfp_prog, meta); + return mem_stx4_skb(nfp_prog, meta); +} + static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off < 0) /* TODO */ @@ -1530,6 +1572,47 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); } +static void nfp_outro_xdp(struct nfp_prog *nfp_prog) +{ + /* XDP return codes: + * 0 aborted 0x82 -> drop, count as stat3 + * 1 drop 0x22 -> drop, count as stat1 + * 2 pass 0x11 -> pass, count as stat0 + * 3 tx 0x44 -> redir, count as stat2 + * * unknown 0x82 -> drop, count as stat3 + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 3 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + + wrp_immed(nfp_prog, reg_b(2), 0x44112282); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->act) { @@ -1540,6 +1623,9 @@ static void nfp_outro(struct nfp_prog *nfp_prog) case NN_ACT_TC_REDIR: nfp_outro_tc_legacy(nfp_prog); break; + case NN_ACT_XDP: + nfp_outro_xdp(nfp_prog); + break; } } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c index 144cae87f63a..b3361f9b8e5c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -80,6 +80,9 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, { const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + if (nfp_prog->act == NN_ACT_XDP) + return 0; + if (reg0->type != CONST_IMM) { pr_info("unsupported exit state: %d, imm: %llx\n", reg0->type, reg0->imm); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index e8713254786b..2115f446031e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -171,7 +171,10 @@ struct nfp_net_tx_desc { * on the head's buffer). Equal to skb->len for non-TSO packets. */ struct nfp_net_tx_buf { - struct sk_buff *skb; + union { + struct sk_buff *skb; + void *frag; + }; dma_addr_t dma_addr; short int fidx; u16 pkt_cnt; @@ -341,6 +344,7 @@ struct nfp_net_rx_ring { * @napi: NAPI structure for this ring vec * @tx_ring: Pointer to TX ring * @rx_ring: Pointer to RX ring + * @xdp_ring: Pointer to an extra TX ring for XDP * @irq_idx: Index into MSI-X table * @rx_sync: Seqlock for atomic updates of RX stats * @rx_pkts: Number of received packets @@ -384,6 +388,8 @@ struct nfp_net_r_vector { u64 hw_csum_rx_inner_ok; u64 hw_csum_rx_error; + struct nfp_net_tx_ring *xdp_ring; + struct u64_stats_sync tx_sync; u64 tx_pkts; u64 tx_bytes; @@ -429,9 +435,11 @@ struct nfp_stat_pair { * @is_vf: Is the driver attached to a VF? * @fw_loaded: Is the firmware loaded? * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf + * @bpf_offload_xdp: Offloaded BPF program is XDP * @ctrl: Local copy of the control register/word. * @fl_bufsz: Currently configured size of the freelist buffers * @rx_offset: Offset in the RX buffers where packet data starts + * @xdp_prog: Installed XDP program * @cpp: Pointer to the CPP handle * @nfp_dev_cpp: Pointer to the NFP Device handle * @ctrl_area: Pointer to the CPP area for the control BAR @@ -451,6 +459,7 @@ struct nfp_stat_pair { * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware * @num_tx_rings: Currently configured number of TX rings + * @num_stack_tx_rings: Number of TX rings used by the stack (not XDP) * @num_rx_rings: Currently configured number of RX rings * @txd_cnt: Size of the TX ring in number of descriptors * @rxd_cnt: Size of the RX ring in number of descriptors @@ -494,12 +503,15 @@ struct nfp_net { unsigned is_vf:1; unsigned fw_loaded:1; unsigned bpf_offload_skip_sw:1; + unsigned bpf_offload_xdp:1; u32 ctrl; u32 fl_bufsz; u32 rx_offset; + struct bpf_prog *xdp_prog; + struct nfp_net_tx_ring *tx_rings; struct nfp_net_rx_ring *rx_rings; @@ -532,6 +544,7 @@ struct nfp_net { unsigned int max_rx_rings; unsigned int num_tx_rings; + unsigned int num_stack_tx_rings; unsigned int num_rx_rings; int stride_tx; @@ -583,6 +596,13 @@ struct nfp_net { struct dentry *debugfs_dir; }; +struct nfp_net_ring_set { + unsigned int n_rings; + unsigned int mtu; + unsigned int dcnt; + void *rings; +}; + /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ @@ -771,7 +791,9 @@ void nfp_net_rss_write_key(struct nfp_net *nn); void nfp_net_coalesce_write_cfg(struct nfp_net *nn); int nfp_net_irqs_alloc(struct nfp_net *nn); void nfp_net_irqs_disable(struct nfp_net *nn); -int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt); +int +nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx); #ifdef CONFIG_NFP_NET_DEBUG void nfp_net_debugfs_create(void); @@ -797,8 +819,6 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) #endif /* CONFIG_NFP_NET_DEBUG */ void nfp_net_filter_stats_timer(unsigned long data); -int -nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, - struct tc_cls_bpf_offload *cls_bpf); +int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf); #endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 97e0bbef13d1..99edb9fd84bf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -41,6 +41,7 @@ * Chris Telfer <[email protected]> */ +#include <linux/bpf.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -490,11 +491,12 @@ static void nfp_net_irqs_assign(struct net_device *netdev) nn->num_rx_rings = min(nn->num_r_vecs, nn->num_rx_rings); nn->num_tx_rings = min(nn->num_r_vecs, nn->num_tx_rings); + nn->num_stack_tx_rings = nn->num_tx_rings; nn->lsc_handler = nfp_net_irq_lsc; nn->exn_handler = nfp_net_irq_exn; - for (r = 0; r < nn->num_r_vecs; r++) { + for (r = 0; r < nn->max_r_vecs; r++) { r_vec = &nn->r_vecs[r]; r_vec->nfp_net = nn; r_vec->handler = nfp_net_irq_rxtx; @@ -713,6 +715,13 @@ static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, u64_stats_update_end(&r_vec->tx_sync); } +static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) +{ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); + tx_ring->wr_ptr_add = 0; +} + /** * nfp_net_tx() - Main transmit entry point * @skb: SKB to transmit @@ -827,12 +836,8 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) nfp_net_tx_ring_stop(nd_q, tx_ring); tx_ring->wr_ptr_add += nr_frags + 1; - if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { - /* force memory write before we let HW know */ - wmb(); - nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); - tx_ring->wr_ptr_add = 0; - } + if (!skb->xmit_more || netif_xmit_stopped(nd_q)) + nfp_net_tx_xmit_more_flush(tx_ring); skb_tx_timestamp(skb); @@ -954,6 +959,56 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); } +static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + u32 done_pkts = 0, done_bytes = 0; + int idx, todo; + u32 qcp_rd_p; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + if (qcp_rd_p > tx_ring->qcp_rd_p) + todo = qcp_rd_p - tx_ring->qcp_rd_p; + else + todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; + + while (todo--) { + idx = tx_ring->rd_p & (tx_ring->cnt - 1); + tx_ring->rd_p++; + + if (!tx_ring->txbufs[idx].frag) + continue; + + nfp_net_dma_unmap_rx(nn, tx_ring->txbufs[idx].dma_addr, + nn->fl_bufsz, DMA_BIDIRECTIONAL); + __free_page(virt_to_page(tx_ring->txbufs[idx].frag)); + + done_pkts++; + done_bytes += tx_ring->txbufs[idx].real_len; + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].frag = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + /** * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers * @nn: NFP Net device @@ -964,39 +1019,47 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) static void nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring) { + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; const struct skb_frag_struct *frag; - struct netdev_queue *nd_q; struct pci_dev *pdev = nn->pdev; + struct netdev_queue *nd_q; while (tx_ring->rd_p != tx_ring->wr_p) { - int nr_frags, fidx, idx; - struct sk_buff *skb; + struct nfp_net_tx_buf *tx_buf; + int idx; idx = tx_ring->rd_p & (tx_ring->cnt - 1); - skb = tx_ring->txbufs[idx].skb; - nr_frags = skb_shinfo(skb)->nr_frags; - fidx = tx_ring->txbufs[idx].fidx; + tx_buf = &tx_ring->txbufs[idx]; - if (fidx == -1) { - /* unmap head */ - dma_unmap_single(&pdev->dev, - tx_ring->txbufs[idx].dma_addr, - skb_headlen(skb), DMA_TO_DEVICE); + if (tx_ring == r_vec->xdp_ring) { + nfp_net_dma_unmap_rx(nn, tx_buf->dma_addr, + nn->fl_bufsz, DMA_BIDIRECTIONAL); + __free_page(virt_to_page(tx_ring->txbufs[idx].frag)); } else { - /* unmap fragment */ - frag = &skb_shinfo(skb)->frags[fidx]; - dma_unmap_page(&pdev->dev, - tx_ring->txbufs[idx].dma_addr, - skb_frag_size(frag), DMA_TO_DEVICE); - } + struct sk_buff *skb = tx_ring->txbufs[idx].skb; + int nr_frags = skb_shinfo(skb)->nr_frags; + + if (tx_buf->fidx == -1) { + /* unmap head */ + dma_unmap_single(&pdev->dev, tx_buf->dma_addr, + skb_headlen(skb), + DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[tx_buf->fidx]; + dma_unmap_page(&pdev->dev, tx_buf->dma_addr, + skb_frag_size(frag), + DMA_TO_DEVICE); + } - /* check for last gather fragment */ - if (fidx == nr_frags - 1) - dev_kfree_skb_any(skb); + /* check for last gather fragment */ + if (tx_buf->fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + } - tx_ring->txbufs[idx].dma_addr = 0; - tx_ring->txbufs[idx].skb = NULL; - tx_ring->txbufs[idx].fidx = -2; + tx_buf->dma_addr = 0; + tx_buf->skb = NULL; + tx_buf->fidx = -2; tx_ring->qcp_rd_p++; tx_ring->rd_p++; @@ -1008,6 +1071,9 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring) tx_ring->qcp_rd_p = 0; tx_ring->wr_ptr_add = 0; + if (tx_ring == r_vec->xdp_ring) + return; + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); netdev_tx_reset_queue(nd_q); } @@ -1017,7 +1083,7 @@ static void nfp_net_tx_timeout(struct net_device *netdev) struct nfp_net *nn = netdev_priv(netdev); int i; - for (i = 0; i < nn->num_tx_rings; i++) { + for (i = 0; i < nn->netdev->real_num_tx_queues; i++) { if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) continue; nn_warn(nn, "TX timeout on ring: %d\n", i); @@ -1045,11 +1111,21 @@ nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu) return fl_bufsz; } +static void +nfp_net_free_frag(void *frag, bool xdp) +{ + if (!xdp) + skb_free_frag(frag); + else + __free_page(virt_to_page(frag)); +} + /** * nfp_net_rx_alloc_one() - Allocate and map page frag for RX * @rx_ring: RX ring structure of the skb * @dma_addr: Pointer to storage for DMA address (output param) * @fl_bufsz: size of freelist buffers + * @xdp: Whether XDP is enabled * * This function will allcate a new page frag, map it for DMA. * @@ -1057,20 +1133,26 @@ nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu) */ static void * nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr, - unsigned int fl_bufsz) + unsigned int fl_bufsz, bool xdp) { struct nfp_net *nn = rx_ring->r_vec->nfp_net; + int direction; void *frag; - frag = netdev_alloc_frag(fl_bufsz); + if (!xdp) + frag = netdev_alloc_frag(fl_bufsz); + else + frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD)); if (!frag) { nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n"); return NULL; } - *dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, DMA_FROM_DEVICE); + direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + + *dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, direction); if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { - skb_free_frag(frag); + nfp_net_free_frag(frag, xdp); nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); return NULL; } @@ -1078,19 +1160,23 @@ nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr, return frag; } -static void *nfp_net_napi_alloc_one(struct nfp_net *nn, dma_addr_t *dma_addr) +static void * +nfp_net_napi_alloc_one(struct nfp_net *nn, int direction, dma_addr_t *dma_addr) { void *frag; - frag = napi_alloc_frag(nn->fl_bufsz); + if (!nn->xdp_prog) + frag = napi_alloc_frag(nn->fl_bufsz); + else + frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD)); if (!frag) { nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n"); return NULL; } - *dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, DMA_FROM_DEVICE); + *dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, direction); if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { - skb_free_frag(frag); + nfp_net_free_frag(frag, nn->xdp_prog); nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); return NULL; } @@ -1161,14 +1247,17 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring * @nn: NFP Net device * @rx_ring: RX ring to remove buffers from + * @xdp: Whether XDP is enabled * * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1) * entries. After device is disabled nfp_net_rx_ring_reset() must be called * to restore required ring geometry. */ static void -nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) +nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, + bool xdp) { + int direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; unsigned int i; for (i = 0; i < rx_ring->cnt - 1; i++) { @@ -1180,8 +1269,8 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) continue; nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr, - rx_ring->bufsz, DMA_FROM_DEVICE); - skb_free_frag(rx_ring->rxbufs[i].frag); + rx_ring->bufsz, direction); + nfp_net_free_frag(rx_ring->rxbufs[i].frag, xdp); rx_ring->rxbufs[i].dma_addr = 0; rx_ring->rxbufs[i].frag = NULL; } @@ -1191,9 +1280,11 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW) * @nn: NFP Net device * @rx_ring: RX ring to remove buffers from + * @xdp: Whether XDP is enabled */ static int -nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) +nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, + bool xdp) { struct nfp_net_rx_buf *rxbufs; unsigned int i; @@ -1203,9 +1294,9 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) for (i = 0; i < rx_ring->cnt - 1; i++) { rxbufs[i].frag = nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr, - rx_ring->bufsz); + rx_ring->bufsz, xdp); if (!rxbufs[i].frag) { - nfp_net_rx_ring_bufs_free(nn, rx_ring); + nfp_net_rx_ring_bufs_free(nn, rx_ring, xdp); return -ENOMEM; } } @@ -1368,6 +1459,68 @@ nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring, dev_kfree_skb_any(skb); } +static void +nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_rx_buf *rxbuf, unsigned int pkt_off, + unsigned int pkt_len) +{ + struct nfp_net_tx_buf *txbuf; + struct nfp_net_tx_desc *txd; + dma_addr_t new_dma_addr; + void *new_frag; + int wr_idx; + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL); + return; + } + + new_frag = nfp_net_napi_alloc_one(nn, DMA_BIDIRECTIONAL, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL); + return; + } + nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr); + + wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->frag = rxbuf->frag; + txbuf->dma_addr = rxbuf->dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = pkt_len; + + dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off, + pkt_len, DMA_TO_DEVICE); + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = PCIE_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + pkt_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->l4_offset = 0; + + tx_ring->wr_p++; + tx_ring->wr_ptr_add++; +} + +static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len) +{ + struct xdp_buff xdp; + + xdp.data = data; + xdp.data_end = data + len; + + return BPF_PROG_RUN(prog, (void *)&xdp); +} + /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1383,16 +1536,27 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) { struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; - unsigned int data_len, meta_len; - struct nfp_net_rx_buf *rxbuf; - struct nfp_net_rx_desc *rxd; - dma_addr_t new_dma_addr; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + unsigned int true_bufsz; struct sk_buff *skb; int pkts_polled = 0; - void *new_frag; + int rx_dma_map_dir; int idx; + rcu_read_lock(); + xdp_prog = READ_ONCE(nn->xdp_prog); + rx_dma_map_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + true_bufsz = xdp_prog ? PAGE_SIZE : nn->fl_bufsz; + tx_ring = r_vec->xdp_ring; + while (pkts_polled < budget) { + unsigned int meta_len, data_len, data_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + void *new_frag; + idx = rx_ring->rd_p & (rx_ring->cnt - 1); rxd = &rx_ring->rxds[idx]; @@ -1408,22 +1572,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) pkts_polled++; rxbuf = &rx_ring->rxbufs[idx]; - skb = build_skb(rxbuf->frag, nn->fl_bufsz); - if (unlikely(!skb)) { - nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL); - continue; - } - new_frag = nfp_net_napi_alloc_one(nn, &new_dma_addr); - if (unlikely(!new_frag)) { - nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb); - continue; - } - - nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[idx].dma_addr, - nn->fl_bufsz, DMA_FROM_DEVICE); - - nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr); - /* < meta_len > * <-- [rx_offset] --> * --------------------------------------------------------- @@ -1438,20 +1586,66 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) */ meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) - skb_reserve(skb, NFP_NET_RX_BUF_HEADROOM + meta_len); + pkt_off = meta_len; else - skb_reserve(skb, - NFP_NET_RX_BUF_HEADROOM + nn->rx_offset); - skb_put(skb, data_len - meta_len); + pkt_off = nn->rx_offset; + data_off = NFP_NET_RX_BUF_HEADROOM + pkt_off; /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; - r_vec->rx_bytes += skb->len; + r_vec->rx_bytes += pkt_len; u64_stats_update_end(&r_vec->rx_sync); + if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF && + nn->bpf_offload_xdp)) { + int act; + + dma_sync_single_for_cpu(&nn->pdev->dev, + rxbuf->dma_addr + pkt_off, + pkt_len, DMA_FROM_DEVICE); + act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off, + pkt_len); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + nfp_net_tx_xdp_buf(nn, rx_ring, tx_ring, rxbuf, + pkt_off, pkt_len); + continue; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + nfp_net_rx_give_one(rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } + } + + skb = build_skb(rxbuf->frag, true_bufsz); + if (unlikely(!skb)) { + nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL); + continue; + } + new_frag = nfp_net_napi_alloc_one(nn, rx_dma_map_dir, + &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb); + continue; + } + + nfp_net_dma_unmap_rx(nn, rxbuf->dma_addr, nn->fl_bufsz, + rx_dma_map_dir); + + nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, data_off); + skb_put(skb, pkt_len); + if (nn->fw_ver.major <= 3) { nfp_net_set_hash_desc(nn->netdev, skb, rxd); } else if (meta_len) { @@ -1477,6 +1671,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) napi_gro_receive(&rx_ring->r_vec->napi, skb); } + if (xdp_prog && tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + rcu_read_unlock(); + return pkts_polled; } @@ -1495,8 +1693,11 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) if (r_vec->tx_ring) nfp_net_tx_complete(r_vec->tx_ring); - if (r_vec->rx_ring) + if (r_vec->rx_ring) { pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); + if (r_vec->xdp_ring) + nfp_net_xdp_complete(r_vec->xdp_ring); + } if (pkts_polled < budget) { napi_complete_done(napi, pkts_polled); @@ -1536,10 +1737,12 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring * @tx_ring: TX Ring structure to allocate * @cnt: Ring buffer count + * @is_xdp: True if ring will be used for XDP * * Return: 0 on success, negative errno otherwise. */ -static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt) +static int +nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt, bool is_xdp) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; @@ -1559,11 +1762,14 @@ static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt) if (!tx_ring->txbufs) goto err_alloc; - netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx); + if (!is_xdp) + netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, + tx_ring->idx); - nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n", + nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p %s\n", tx_ring->idx, tx_ring->qcidx, - tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds); + tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds, + is_xdp ? "XDP" : ""); return 0; @@ -1573,23 +1779,29 @@ err_alloc: } static struct nfp_net_tx_ring * -nfp_net_shadow_tx_rings_prepare(struct nfp_net *nn, u32 buf_cnt) +nfp_net_tx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s, + unsigned int num_stack_tx_rings) { struct nfp_net_tx_ring *rings; unsigned int r; - rings = kcalloc(nn->num_tx_rings, sizeof(*rings), GFP_KERNEL); + rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL); if (!rings) return NULL; - for (r = 0; r < nn->num_tx_rings; r++) { - nfp_net_tx_ring_init(&rings[r], nn->tx_rings[r].r_vec, r); + for (r = 0; r < s->n_rings; r++) { + int bias = 0; + + if (r >= num_stack_tx_rings) + bias = num_stack_tx_rings; + + nfp_net_tx_ring_init(&rings[r], &nn->r_vecs[r - bias], r); - if (nfp_net_tx_ring_alloc(&rings[r], buf_cnt)) + if (nfp_net_tx_ring_alloc(&rings[r], s->dcnt, bias)) goto err_free_prev; } - return rings; + return s->rings = rings; err_free_prev: while (r--) @@ -1598,28 +1810,27 @@ err_free_prev: return NULL; } -static struct nfp_net_tx_ring * -nfp_net_shadow_tx_rings_swap(struct nfp_net *nn, struct nfp_net_tx_ring *rings) +static void +nfp_net_tx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s) { - struct nfp_net_tx_ring *old = nn->tx_rings; - unsigned int r; + struct nfp_net_ring_set new = *s; - for (r = 0; r < nn->num_tx_rings; r++) - old[r].r_vec->tx_ring = &rings[r]; + s->dcnt = nn->txd_cnt; + s->rings = nn->tx_rings; + s->n_rings = nn->num_tx_rings; - nn->tx_rings = rings; - return old; + nn->txd_cnt = new.dcnt; + nn->tx_rings = new.rings; + nn->num_tx_rings = new.n_rings; } static void -nfp_net_shadow_tx_rings_free(struct nfp_net *nn, struct nfp_net_tx_ring *rings) +nfp_net_tx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s) { + struct nfp_net_tx_ring *rings = s->rings; unsigned int r; - if (!rings) - return; - - for (r = 0; r < nn->num_tx_rings; r++) + for (r = 0; r < s->n_rings; r++) nfp_net_tx_ring_free(&rings[r]); kfree(rings); @@ -1691,31 +1902,32 @@ err_alloc: } static struct nfp_net_rx_ring * -nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz, - u32 buf_cnt) +nfp_net_rx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s, + bool xdp) { + unsigned int fl_bufsz = nfp_net_calc_fl_bufsz(nn, s->mtu); struct nfp_net_rx_ring *rings; unsigned int r; - rings = kcalloc(nn->num_rx_rings, sizeof(*rings), GFP_KERNEL); + rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL); if (!rings) return NULL; - for (r = 0; r < nn->num_rx_rings; r++) { - nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r); + for (r = 0; r < s->n_rings; r++) { + nfp_net_rx_ring_init(&rings[r], &nn->r_vecs[r], r); - if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, buf_cnt)) + if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, s->dcnt)) goto err_free_prev; - if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r])) + if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r], xdp)) goto err_free_ring; } - return rings; + return s->rings = rings; err_free_prev: while (r--) { - nfp_net_rx_ring_bufs_free(nn, &rings[r]); + nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp); err_free_ring: nfp_net_rx_ring_free(&rings[r]); } @@ -1723,35 +1935,50 @@ err_free_ring: return NULL; } -static struct nfp_net_rx_ring * -nfp_net_shadow_rx_rings_swap(struct nfp_net *nn, struct nfp_net_rx_ring *rings) +static void +nfp_net_rx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s) { - struct nfp_net_rx_ring *old = nn->rx_rings; - unsigned int r; + struct nfp_net_ring_set new = *s; - for (r = 0; r < nn->num_rx_rings; r++) - old[r].r_vec->rx_ring = &rings[r]; + s->mtu = nn->netdev->mtu; + s->dcnt = nn->rxd_cnt; + s->rings = nn->rx_rings; + s->n_rings = nn->num_rx_rings; - nn->rx_rings = rings; - return old; + nn->netdev->mtu = new.mtu; + nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, new.mtu); + nn->rxd_cnt = new.dcnt; + nn->rx_rings = new.rings; + nn->num_rx_rings = new.n_rings; } static void -nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings) +nfp_net_rx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s, + bool xdp) { + struct nfp_net_rx_ring *rings = s->rings; unsigned int r; - if (!rings) - return; - - for (r = 0; r < nn->num_rx_rings; r++) { - nfp_net_rx_ring_bufs_free(nn, &rings[r]); + for (r = 0; r < s->n_rings; r++) { + nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp); nfp_net_rx_ring_free(&rings[r]); } kfree(rings); } +static void +nfp_net_vector_assign_rings(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + int idx) +{ + r_vec->rx_ring = idx < nn->num_rx_rings ? &nn->rx_rings[idx] : NULL; + r_vec->tx_ring = + idx < nn->num_stack_tx_rings ? &nn->tx_rings[idx] : NULL; + + r_vec->xdp_ring = idx < nn->num_tx_rings - nn->num_stack_tx_rings ? + &nn->tx_rings[nn->num_stack_tx_rings + idx] : NULL; +} + static int nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, int idx) @@ -1759,33 +1986,20 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx]; int err; - if (idx < nn->num_tx_rings) { - r_vec->tx_ring = &nn->tx_rings[idx]; - nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, idx); - } else { - r_vec->tx_ring = NULL; - } - - if (idx < nn->num_rx_rings) { - r_vec->rx_ring = &nn->rx_rings[idx]; - nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, idx); - } else { - r_vec->rx_ring = NULL; - } + /* Setup NAPI */ + netif_napi_add(nn->netdev, &r_vec->napi, + nfp_net_poll, NAPI_POLL_WEIGHT); snprintf(r_vec->name, sizeof(r_vec->name), "%s-rxtx-%d", nn->netdev->name, idx); err = request_irq(entry->vector, r_vec->handler, 0, r_vec->name, r_vec); if (err) { + netif_napi_del(&r_vec->napi); nn_err(nn, "Error requesting IRQ %d\n", entry->vector); return err; } disable_irq(entry->vector); - /* Setup NAPI */ - netif_napi_add(nn->netdev, &r_vec->napi, - nfp_net_poll, NAPI_POLL_WEIGHT); - irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask); nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, entry->vector, entry->entry); @@ -1913,9 +2127,9 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) nn_err(nn, "Could not disable device: %d\n", err); for (r = 0; r < nn->num_rx_rings; r++) - nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring); + nfp_net_rx_ring_reset(&nn->rx_rings[r]); for (r = 0; r < nn->num_tx_rings; r++) - nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring); + nfp_net_tx_ring_reset(nn, &nn->tx_rings[r]); for (r = 0; r < nn->num_r_vecs; r++) nfp_net_vec_clear_ring_data(nn, r); @@ -1993,7 +2207,7 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn) nn->ctrl = new_ctrl; for (r = 0; r < nn->num_rx_rings; r++) - nfp_net_rx_ring_fill_freelist(nn->r_vecs[r].rx_ring); + nfp_net_rx_ring_fill_freelist(&nn->rx_rings[r]); /* Since reconfiguration requests while NFP is down are ignored we * have to wipe the entire VXLAN configuration and reinitialize it. @@ -2044,6 +2258,15 @@ static void nfp_net_open_stack(struct nfp_net *nn) static int nfp_net_netdev_open(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = nn->netdev->mtu, + .dcnt = nn->rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = nn->num_tx_rings, + .dcnt = nn->txd_cnt, + }; int err, r; if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) { @@ -2068,41 +2291,29 @@ static int nfp_net_netdev_open(struct net_device *netdev) goto err_free_exn; disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); - nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), - GFP_KERNEL); - if (!nn->rx_rings) { - err = -ENOMEM; - goto err_free_lsc; - } - nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings), - GFP_KERNEL); - if (!nn->tx_rings) { - err = -ENOMEM; - goto err_free_rx_rings; - } - for (r = 0; r < nn->num_r_vecs; r++) { err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); if (err) goto err_cleanup_vec_p; } - for (r = 0; r < nn->num_tx_rings; r++) { - err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring, nn->txd_cnt); - if (err) - goto err_free_tx_ring_p; + + nn->rx_rings = nfp_net_rx_ring_set_prepare(nn, &rx, nn->xdp_prog); + if (!nn->rx_rings) { + err = -ENOMEM; + goto err_cleanup_vec; } - for (r = 0; r < nn->num_rx_rings; r++) { - err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring, - nn->fl_bufsz, nn->rxd_cnt); - if (err) - goto err_flush_free_rx_ring_p; - err = nfp_net_rx_ring_bufs_alloc(nn, nn->r_vecs[r].rx_ring); - if (err) - goto err_free_rx_ring_p; + nn->tx_rings = nfp_net_tx_ring_set_prepare(nn, &tx, + nn->num_stack_tx_rings); + if (!nn->tx_rings) { + err = -ENOMEM; + goto err_free_rx_rings; } - err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings); + for (r = 0; r < nn->max_r_vecs; r++) + nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r); + + err = netif_set_real_num_tx_queues(netdev, nn->num_stack_tx_rings); if (err) goto err_free_rings; @@ -2132,25 +2343,14 @@ static int nfp_net_netdev_open(struct net_device *netdev) return 0; err_free_rings: - r = nn->num_rx_rings; -err_flush_free_rx_ring_p: - while (r--) { - nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring); -err_free_rx_ring_p: - nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring); - } - r = nn->num_tx_rings; -err_free_tx_ring_p: - while (r--) - nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring); + nfp_net_tx_ring_set_free(nn, &tx); +err_free_rx_rings: + nfp_net_rx_ring_set_free(nn, &rx, nn->xdp_prog); +err_cleanup_vec: r = nn->num_r_vecs; err_cleanup_vec_p: while (r--) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - kfree(nn->tx_rings); -err_free_rx_rings: - kfree(nn->rx_rings); -err_free_lsc: nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); err_free_exn: nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); @@ -2186,11 +2386,11 @@ static void nfp_net_close_free_all(struct nfp_net *nn) unsigned int r; for (r = 0; r < nn->num_rx_rings; r++) { - nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring); - nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring); + nfp_net_rx_ring_bufs_free(nn, &nn->rx_rings[r], nn->xdp_prog); + nfp_net_rx_ring_free(&nn->rx_rings[r]); } for (r = 0; r < nn->num_tx_rings; r++) - nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring); + nfp_net_tx_ring_free(&nn->tx_rings[r]); for (r = 0; r < nn->num_r_vecs; r++) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); @@ -2255,89 +2455,137 @@ static void nfp_net_set_rx_mode(struct net_device *netdev) nn->ctrl = new_ctrl; } -static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +static void nfp_net_rss_init_itbl(struct nfp_net *nn) { - unsigned int old_mtu, old_fl_bufsz, new_fl_bufsz; - struct nfp_net *nn = netdev_priv(netdev); - struct nfp_net_rx_ring *tmp_rings; - int err; - - old_mtu = netdev->mtu; - old_fl_bufsz = nn->fl_bufsz; - new_fl_bufsz = nfp_net_calc_fl_bufsz(nn, new_mtu); - - if (!netif_running(netdev)) { - netdev->mtu = new_mtu; - nn->fl_bufsz = new_fl_bufsz; - return 0; - } + int i; - /* Prepare new rings */ - tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz, - nn->rxd_cnt); - if (!tmp_rings) - return -ENOMEM; + for (i = 0; i < sizeof(nn->rss_itbl); i++) + nn->rss_itbl[i] = + ethtool_rxfh_indir_default(i, nn->num_rx_rings); +} - /* Stop device, swap in new rings, try to start the firmware */ - nfp_net_close_stack(nn); - nfp_net_clear_config_and_disable(nn); +static int +nfp_net_ring_swap_enable(struct nfp_net *nn, unsigned int *num_vecs, + unsigned int *stack_tx_rings, + struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, + struct nfp_net_ring_set *tx) +{ + unsigned int r; + int err; - tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings); + if (rx) + nfp_net_rx_ring_set_swap(nn, rx); + if (tx) + nfp_net_tx_ring_set_swap(nn, tx); - netdev->mtu = new_mtu; - nn->fl_bufsz = new_fl_bufsz; + swap(*num_vecs, nn->num_r_vecs); + swap(*stack_tx_rings, nn->num_stack_tx_rings); + *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog); - err = nfp_net_set_config_and_enable(nn); - if (err) { - const int err_new = err; + for (r = 0; r < nn->max_r_vecs; r++) + nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r); - /* Try with old configuration and old rings */ - tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings); + if (nn->netdev->real_num_rx_queues != nn->num_rx_rings) { + if (!netif_is_rxfh_configured(nn->netdev)) + nfp_net_rss_init_itbl(nn); - netdev->mtu = old_mtu; - nn->fl_bufsz = old_fl_bufsz; + err = netif_set_real_num_rx_queues(nn->netdev, + nn->num_rx_rings); + if (err) + return err; + } - err = __nfp_net_set_config_and_enable(nn); + if (nn->netdev->real_num_tx_queues != nn->num_stack_tx_rings) { + err = netif_set_real_num_tx_queues(nn->netdev, + nn->num_stack_tx_rings); if (err) - nn_err(nn, "Can't restore MTU - FW communication failed (%d,%d)\n", - err_new, err); + return err; } - nfp_net_shadow_rx_rings_free(nn, tmp_rings); + return __nfp_net_set_config_and_enable(nn); +} - nfp_net_open_stack(nn); +static int +nfp_net_check_config(struct nfp_net *nn, struct bpf_prog *xdp_prog, + struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx) +{ + /* XDP-enabled tests */ + if (!xdp_prog) + return 0; + if (rx && nfp_net_calc_fl_bufsz(nn, rx->mtu) > PAGE_SIZE) { + nn_warn(nn, "MTU too large w/ XDP enabled\n"); + return -EINVAL; + } + if (tx && tx->n_rings > nn->max_tx_rings) { + nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n"); + return -EINVAL; + } - return err; + return 0; } -int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) -{ - struct nfp_net_tx_ring *tx_rings = NULL; - struct nfp_net_rx_ring *rx_rings = NULL; - u32 old_rxd_cnt, old_txd_cnt; +static void +nfp_net_ring_reconfig_down(struct nfp_net *nn, struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, + struct nfp_net_ring_set *tx, + unsigned int stack_tx_rings, unsigned int num_vecs) +{ + nn->netdev->mtu = rx ? rx->mtu : nn->netdev->mtu; + nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, nn->netdev->mtu); + nn->rxd_cnt = rx ? rx->dcnt : nn->rxd_cnt; + nn->txd_cnt = tx ? tx->dcnt : nn->txd_cnt; + nn->num_rx_rings = rx ? rx->n_rings : nn->num_rx_rings; + nn->num_tx_rings = tx ? tx->n_rings : nn->num_tx_rings; + nn->num_stack_tx_rings = stack_tx_rings; + nn->num_r_vecs = num_vecs; + *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog); + + if (!netif_is_rxfh_configured(nn->netdev)) + nfp_net_rss_init_itbl(nn); +} + +int +nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog, + struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx) +{ + unsigned int stack_tx_rings, num_vecs, r; int err; + stack_tx_rings = tx ? tx->n_rings : nn->num_tx_rings; + if (*xdp_prog) + stack_tx_rings -= rx ? rx->n_rings : nn->num_rx_rings; + + num_vecs = max(rx ? rx->n_rings : nn->num_rx_rings, stack_tx_rings); + + err = nfp_net_check_config(nn, *xdp_prog, rx, tx); + if (err) + return err; + if (!netif_running(nn->netdev)) { - nn->rxd_cnt = rxd_cnt; - nn->txd_cnt = txd_cnt; + nfp_net_ring_reconfig_down(nn, xdp_prog, rx, tx, + stack_tx_rings, num_vecs); return 0; } - old_rxd_cnt = nn->rxd_cnt; - old_txd_cnt = nn->txd_cnt; - /* Prepare new rings */ - if (nn->rxd_cnt != rxd_cnt) { - rx_rings = nfp_net_shadow_rx_rings_prepare(nn, nn->fl_bufsz, - rxd_cnt); - if (!rx_rings) - return -ENOMEM; + for (r = nn->num_r_vecs; r < num_vecs; r++) { + err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); + if (err) { + num_vecs = r; + goto err_cleanup_vecs; + } } - if (nn->txd_cnt != txd_cnt) { - tx_rings = nfp_net_shadow_tx_rings_prepare(nn, txd_cnt); - if (!tx_rings) { - nfp_net_shadow_rx_rings_free(nn, rx_rings); - return -ENOMEM; + if (rx) { + if (!nfp_net_rx_ring_set_prepare(nn, rx, *xdp_prog)) { + err = -ENOMEM; + goto err_cleanup_vecs; + } + } + if (tx) { + if (!nfp_net_tx_ring_set_prepare(nn, tx, stack_tx_rings)) { + err = -ENOMEM; + goto err_free_rx; } } @@ -2345,39 +2593,51 @@ int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) nfp_net_close_stack(nn); nfp_net_clear_config_and_disable(nn); - if (rx_rings) - rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings); - if (tx_rings) - tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings); - - nn->rxd_cnt = rxd_cnt; - nn->txd_cnt = txd_cnt; - - err = nfp_net_set_config_and_enable(nn); + err = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings, + xdp_prog, rx, tx); if (err) { - const int err_new = err; - - /* Try with old configuration and old rings */ - if (rx_rings) - rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings); - if (tx_rings) - tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings); + int err2; - nn->rxd_cnt = old_rxd_cnt; - nn->txd_cnt = old_txd_cnt; + nfp_net_clear_config_and_disable(nn); - err = __nfp_net_set_config_and_enable(nn); - if (err) + /* Try with old configuration and old rings */ + err2 = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings, + xdp_prog, rx, tx); + if (err2) nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n", - err_new, err); + err, err2); } + for (r = num_vecs - 1; r >= nn->num_r_vecs; r--) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - nfp_net_shadow_rx_rings_free(nn, rx_rings); - nfp_net_shadow_tx_rings_free(nn, tx_rings); + if (rx) + nfp_net_rx_ring_set_free(nn, rx, *xdp_prog); + if (tx) + nfp_net_tx_ring_set_free(nn, tx); nfp_net_open_stack(nn); return err; + +err_free_rx: + if (rx) + nfp_net_rx_ring_set_free(nn, rx, *xdp_prog); +err_cleanup_vecs: + for (r = num_vecs - 1; r >= nn->num_r_vecs; r--) + nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); + return err; +} + +static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = new_mtu, + .dcnt = nn->rxd_cnt, + }; + + return nfp_net_ring_reconfig(nn, &nn->xdp_prog, &rx, NULL); } static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, @@ -2434,8 +2694,12 @@ nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, if (proto != htons(ETH_P_ALL)) return -ENOTSUPP; - if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) - return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf); + if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) { + if (!nn->bpf_offload_xdp) + return nfp_net_bpf_offload(nn, tc->cls_bpf); + else + return -EBUSY; + } return -EINVAL; } @@ -2643,6 +2907,87 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev, nfp_net_set_vxlan_port(nn, idx, 0); } +static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct tc_cls_bpf_offload cmd = { + .prog = prog, + }; + int ret; + + if (!nfp_net_ebpf_capable(nn)) + return -EINVAL; + + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) { + if (!nn->bpf_offload_xdp) + return prog ? -EBUSY : 0; + cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY; + } else { + if (!prog) + return 0; + cmd.command = TC_CLSBPF_ADD; + } + + ret = nfp_net_bpf_offload(nn, &cmd); + /* Stop offload if replace not possible */ + if (ret && cmd.command == TC_CLSBPF_REPLACE) + nfp_net_xdp_offload(nn, NULL); + nn->bpf_offload_xdp = prog && !ret; + return ret; +} + +static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = nn->netdev->mtu, + .dcnt = nn->rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = nn->num_tx_rings, + .dcnt = nn->txd_cnt, + }; + int err; + + if (!prog && !nn->xdp_prog) + return 0; + if (prog && nn->xdp_prog) { + prog = xchg(&nn->xdp_prog, prog); + bpf_prog_put(prog); + nfp_net_xdp_offload(nn, nn->xdp_prog); + return 0; + } + + tx.n_rings += prog ? nn->num_rx_rings : -nn->num_rx_rings; + + /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */ + err = nfp_net_ring_reconfig(nn, &prog, &rx, &tx); + if (err) + return err; + + /* @prog got swapped and is now the old one */ + if (prog) + bpf_prog_put(prog); + + nfp_net_xdp_offload(nn, nn->xdp_prog); + + return 0; +} + +static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return nfp_net_xdp_setup(nn, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = !!nn->xdp_prog; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops nfp_net_netdev_ops = { .ndo_open = nfp_net_netdev_open, .ndo_stop = nfp_net_netdev_close, @@ -2657,6 +3002,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { .ndo_features_check = nfp_net_features_check, .ndo_udp_tunnel_add = nfp_net_add_vxlan_port, .ndo_udp_tunnel_del = nfp_net_del_vxlan_port, + .ndo_xdp = nfp_net_xdp, }; /** @@ -2763,13 +3109,9 @@ void nfp_net_netdev_free(struct nfp_net *nn) */ static void nfp_net_rss_init(struct nfp_net *nn) { - int i; - netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); - for (i = 0; i < sizeof(nn->rss_itbl); i++) - nn->rss_itbl[i] = - ethtool_rxfh_indir_default(i, nn->num_rx_rings); + nfp_net_rss_init_itbl(nn); /* Enable IPv4/IPv6 TCP by default */ nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | @@ -2923,5 +3265,11 @@ int nfp_net_netdev_init(struct net_device *netdev) */ void nfp_net_netdev_clean(struct net_device *netdev) { - unregister_netdev(netdev); + struct nfp_net *nn = netdev_priv(netdev); + + if (nn->xdp_prog) + bpf_prog_put(nn->xdp_prog); + if (nn->bpf_offload_xdp) + nfp_net_xdp_offload(nn, NULL); + unregister_netdev(nn->netdev); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index 180cf70f0093..c66f3f954aa8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -114,6 +114,16 @@ static const struct file_operations nfp_rx_q_fops = { .llseek = seq_lseek }; +static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f); + +static const struct file_operations nfp_tx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_tx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) { struct nfp_net_r_vector *r_vec = file->private; @@ -126,10 +136,13 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) rtnl_lock(); - if (!r_vec->nfp_net || !r_vec->tx_ring) + if (debugfs_real_fops(file->file) == &nfp_tx_q_fops) + tx_ring = r_vec->tx_ring; + else + tx_ring = r_vec->xdp_ring; + if (!r_vec->nfp_net || !tx_ring) goto out; nn = r_vec->nfp_net; - tx_ring = r_vec->tx_ring; if (!netif_running(nn->netdev)) goto out; @@ -148,9 +161,14 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) txd->vals[2], txd->vals[3]); skb = READ_ONCE(tx_ring->txbufs[i].skb); - if (skb) - seq_printf(file, " skb->head=%p skb->data=%p", - skb->head, skb->data); + if (skb) { + if (tx_ring == r_vec->tx_ring) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + else + seq_printf(file, " frag=%p", skb); + } + if (tx_ring->txbufs[i].dma_addr) seq_printf(file, " dma_addr=%pad", &tx_ring->txbufs[i].dma_addr); @@ -176,7 +194,7 @@ static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f) return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private); } -static const struct file_operations nfp_tx_q_fops = { +static const struct file_operations nfp_xdp_q_fops = { .owner = THIS_MODULE, .open = nfp_net_debugfs_tx_q_open, .release = single_release, @@ -186,7 +204,7 @@ static const struct file_operations nfp_tx_q_fops = { void nfp_net_debugfs_adapter_add(struct nfp_net *nn) { - struct dentry *queues, *tx, *rx; + struct dentry *queues, *tx, *rx, *xdp; char int_name[16]; int i; @@ -204,16 +222,19 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn) rx = debugfs_create_dir("rx", queues); tx = debugfs_create_dir("tx", queues); - if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx)) + xdp = debugfs_create_dir("xdp", queues); + if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx) || IS_ERR_OR_NULL(xdp)) return; - for (i = 0; i < nn->num_rx_rings; i++) { + for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) { sprintf(int_name, "%d", i); debugfs_create_file(int_name, S_IRUSR, rx, &nn->r_vecs[i], &nfp_rx_q_fops); + debugfs_create_file(int_name, S_IRUSR, xdp, + &nn->r_vecs[i], &nfp_xdp_q_fops); } - for (i = 0; i < nn->num_tx_rings; i++) { + for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) { sprintf(int_name, "%d", i); debugfs_create_file(int_name, S_IRUSR, tx, &nn->r_vecs[i], &nfp_tx_q_fops); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 3418f2277e9d..1b26e9646574 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -158,6 +158,28 @@ static void nfp_net_get_ringparam(struct net_device *netdev, ring->tx_pending = nn->txd_cnt; } +static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) +{ + struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL; + struct nfp_net_ring_set rx = { + .n_rings = nn->num_rx_rings, + .mtu = nn->netdev->mtu, + .dcnt = rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = nn->num_tx_rings, + .dcnt = txd_cnt, + }; + + if (nn->rxd_cnt != rxd_cnt) + reconfig_rx = ℞ + if (nn->txd_cnt != txd_cnt) + reconfig_tx = &tx; + + return nfp_net_ring_reconfig(nn, &nn->xdp_prog, + reconfig_rx, reconfig_tx); +} + static int nfp_net_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { @@ -614,6 +636,76 @@ static int nfp_net_set_coalesce(struct net_device *netdev, return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); } +static void nfp_net_get_channels(struct net_device *netdev, + struct ethtool_channels *channel) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int num_tx_rings; + + num_tx_rings = nn->num_tx_rings; + if (nn->xdp_prog) + num_tx_rings -= nn->num_rx_rings; + + channel->max_rx = min(nn->max_rx_rings, nn->max_r_vecs); + channel->max_tx = min(nn->max_tx_rings, nn->max_r_vecs); + channel->max_combined = min(channel->max_rx, channel->max_tx); + channel->max_other = NFP_NET_NON_Q_VECTORS; + channel->combined_count = min(nn->num_rx_rings, num_tx_rings); + channel->rx_count = nn->num_rx_rings - channel->combined_count; + channel->tx_count = num_tx_rings - channel->combined_count; + channel->other_count = NFP_NET_NON_Q_VECTORS; +} + +static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx, + unsigned int total_tx) +{ + struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL; + struct nfp_net_ring_set rx = { + .n_rings = total_rx, + .mtu = nn->netdev->mtu, + .dcnt = nn->rxd_cnt, + }; + struct nfp_net_ring_set tx = { + .n_rings = total_tx, + .dcnt = nn->txd_cnt, + }; + + if (nn->num_rx_rings != total_rx) + reconfig_rx = ℞ + if (nn->num_stack_tx_rings != total_tx || + (nn->xdp_prog && reconfig_rx)) + reconfig_tx = &tx; + + /* nfp_net_check_config() will catch tx.n_rings > nn->max_tx_rings */ + if (nn->xdp_prog) + tx.n_rings += total_rx; + + return nfp_net_ring_reconfig(nn, &nn->xdp_prog, + reconfig_rx, reconfig_tx); +} + +static int nfp_net_set_channels(struct net_device *netdev, + struct ethtool_channels *channel) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int total_rx, total_tx; + + /* Reject unsupported */ + if (!channel->combined_count || + channel->other_count != NFP_NET_NON_Q_VECTORS || + (channel->rx_count && channel->tx_count)) + return -EINVAL; + + total_rx = channel->combined_count + channel->rx_count; + total_tx = channel->combined_count + channel->tx_count; + + if (total_rx > min(nn->max_rx_rings, nn->max_r_vecs) || + total_tx > min(nn->max_tx_rings, nn->max_r_vecs)) + return -EINVAL; + + return nfp_net_set_num_rings(nn, total_rx, total_tx); +} + static const struct ethtool_ops nfp_net_ethtool_ops = { .get_drvinfo = nfp_net_get_drvinfo, .get_link = ethtool_op_get_link, @@ -632,6 +724,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .get_regs = nfp_net_get_regs, .get_coalesce = nfp_net_get_coalesce, .set_coalesce = nfp_net_set_coalesce, + .get_channels = nfp_net_get_channels, + .set_channels = nfp_net_set_channels, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index cfed40c0e310..18a851eb3508 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -111,6 +111,9 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) const struct tc_action *a; LIST_HEAD(actions); + if (!cls_bpf->exts) + return NN_ACT_XDP; + /* TC direct action */ if (cls_bpf->exts_integrated) { if (tc_no_actions(cls_bpf->exts)) @@ -233,9 +236,7 @@ static int nfp_net_bpf_stop(struct nfp_net *nn) return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); } -int -nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, - struct tc_cls_bpf_offload *cls_bpf) +int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) { struct nfp_bpf_result res; dma_addr_t dma_addr; diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 048f9a342413..f5a4ebb3963f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -8581,7 +8581,6 @@ union drv_union_data { struct drv_version_stc drv_version; struct lan_stats_stc lan_stats; - u64 reserved_stats[11]; struct ocbb_data_stc ocbb_info; struct temperature_status_stc temp_info; struct resource_info resource; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index d8e499ebb99d..6dd3ce443484 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1697,19 +1697,27 @@ int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn, u32 *p_mcp_resp, u32 *p_mcp_param) { struct qed_mcp_mb_params mb_params; - union drv_union_data *p_union_data; + union drv_union_data union_data; int rc; memset(&mb_params, 0, sizeof(mb_params)); + memset(&union_data, 0, sizeof(union_data)); mb_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG; mb_params.param = QED_RESC_ALLOC_VERSION; - p_union_data = (union drv_union_data *)p_resc_info; - mb_params.p_data_src = p_union_data; - mb_params.p_data_dst = p_union_data; + + /* Need to have a sufficient large struct, as the cmd_and_union + * is going to do memcpy from and to it. + */ + memcpy(&union_data.resource, p_resc_info, sizeof(*p_resc_info)); + + mb_params.p_data_src = &union_data; + mb_params.p_data_dst = &union_data; rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); if (rc) return rc; + /* Copy the data back */ + memcpy(p_resc_info, &union_data.resource, sizeof(*p_resc_info)); *p_mcp_resp = mb_params.mcp_resp; *p_mcp_param = mb_params.mcp_param; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 00279da6a1e8..e61807e6d47b 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2245,6 +2245,86 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) } } +#define RSS_MODE_HASH_ADDRS (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\ + 1 << RSS_MODE_HASH_DST_ADDR_LBN) +#define RSS_MODE_HASH_PORTS (1 << RSS_MODE_HASH_SRC_PORT_LBN |\ + 1 << RSS_MODE_HASH_DST_PORT_LBN) +#define RSS_CONTEXT_FLAGS_DEFAULT (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\ + (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\ + (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN) + +static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags) +{ + /* Firmware had a bug (sfc bug 61952) where it would not actually + * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS. + * This meant that it would always contain whatever was previously + * in the MCDI buffer. Fortunately, all firmware versions with + * this bug have the same default flags value for a newly-allocated + * RSS context, and the only time we want to get the flags is just + * after allocating. Moreover, the response has a 32-bit hole + * where the context ID would be in the request, so we can use an + * overlength buffer in the request and pre-fill the flags field + * with what we believe the default to be. Thus if the firmware + * has the bug, it will leave our pre-filled value in the flags + * field of the response, and we will get the right answer. + * + * However, this does mean that this function should NOT be used if + * the RSS context flags might not be their defaults - it is ONLY + * reliably correct for a newly-allocated RSS context. + */ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); + size_t outlen; + int rc; + + /* Check we have a hole for the context ID */ + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS, + RSS_CONTEXT_FLAGS_DEFAULT); + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + if (rc == 0) { + if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN) + rc = -EIO; + else + *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS); + } + return rc; +} + +/* Attempt to enable 4-tuple UDP hashing on the specified RSS context. + * If we fail, we just leave the RSS context at its default hash settings, + * which is safe but may slightly reduce performance. + * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we + * just need to set the UDP ports flags (for both IP versions). + */ +static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN); + u32 flags; + + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0); + + if (efx_ef10_get_rss_flags(efx, context, &flags) != 0) + return; + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context); + flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN; + flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN; + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags); + if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf), + NULL, 0, NULL)) + /* Succeeded, so UDP 4-tuple is now enabled */ + efx->rx_hash_udp_4tuple = true; +} + static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context, bool exclusive, unsigned *context_size) { @@ -2290,6 +2370,10 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context, if (context_size) *context_size = rss_spread; + if (nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN) + efx_ef10_set_rss_flags(efx, *context); + return 0; } diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 445ccdb6bc67..bf126f935ade 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -968,20 +968,24 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, info->data = 0; switch (info->flow_type) { + case UDP_V4_FLOW: + if (efx->rx_hash_udp_4tuple) + /* fall through */ case TCP_V4_FLOW: - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; /* fall through */ - case UDP_V4_FLOW: case SCTP_V4_FLOW: case AH_ESP_V4_FLOW: case IPV4_FLOW: info->data |= RXH_IP_SRC | RXH_IP_DST; min_revision = EFX_REV_FALCON_B0; break; + case UDP_V6_FLOW: + if (efx->rx_hash_udp_4tuple) + /* fall through */ case TCP_V6_FLOW: - info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; /* fall through */ - case UDP_V6_FLOW: case SCTP_V6_FLOW: case AH_ESP_V6_FLOW: case IPV6_FLOW: diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 99d8c82124bb..fec51c4b2607 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -853,6 +853,7 @@ struct vfdi_status; * @rx_hash_key: Toeplitz hash key for RSS * @rx_indir_table: Indirection table for RSS * @rx_scatter: Scatter mode enabled for receives + * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will @@ -990,6 +991,7 @@ struct efx_nic { u8 rx_hash_key[40]; u32 rx_indir_table[128]; bool rx_scatter; + bool rx_hash_udp_4tuple; unsigned int_error_count; unsigned long int_error_expire; diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 3818c5e06eba..6e9fcc345798 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -69,6 +69,17 @@ config DWMAC_MESON the stmmac device driver. This driver is used for Meson6, Meson8, Meson8b and GXBB SoCs. +config DWMAC_OXNAS + tristate "Oxford Semiconductor OXNAS dwmac support" + default ARCH_OXNAS + depends on OF && COMMON_CLK && (ARCH_OXNAS || COMPILE_TEST) + select MFD_SYSCON + help + Support for Ethernet controller on Oxford Semiconductor OXNAS SoCs. + + This selects the Oxford Semiconductor OXNASSoC glue layer support for + the stmmac device driver. This driver is used for OX820. + config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 5d6ece5919b3..8f83a86ba13c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o obj-$(CONFIG_DWMAC_IPQ806X) += dwmac-ipq806x.o obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o +obj-$(CONFIG_DWMAC_OXNAS) += dwmac-oxnas.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c new file mode 100644 index 000000000000..c35597586121 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -0,0 +1,217 @@ +/* + * Oxford Semiconductor OXNAS DWMAC glue layer + * + * Copyright (C) 2016 Neil Armstrong <[email protected]> + * Copyright (C) 2014 Daniel Golle <[email protected]> + * Copyright (C) 2013 Ma Haijun <[email protected]> + * Copyright (C) 2012 John Crispin <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/mfd/syscon.h> +#include <linux/stmmac.h> + +#include "stmmac_platform.h" + +/* System Control regmap offsets */ +#define OXNAS_DWMAC_CTRL_REGOFFSET 0x78 +#define OXNAS_DWMAC_DELAY_REGOFFSET 0x100 + +/* Control Register */ +#define DWMAC_CKEN_RX_IN 14 +#define DWMAC_CKEN_RXN_OUT 13 +#define DWMAC_CKEN_RX_OUT 12 +#define DWMAC_CKEN_TX_IN 10 +#define DWMAC_CKEN_TXN_OUT 9 +#define DWMAC_CKEN_TX_OUT 8 +#define DWMAC_RX_SOURCE 7 +#define DWMAC_TX_SOURCE 6 +#define DWMAC_LOW_TX_SOURCE 4 +#define DWMAC_AUTO_TX_SOURCE 3 +#define DWMAC_RGMII 2 +#define DWMAC_SIMPLE_MUX 1 +#define DWMAC_CKEN_GTX 0 + +/* Delay register */ +#define DWMAC_TX_VARDELAY_SHIFT 0 +#define DWMAC_TXN_VARDELAY_SHIFT 8 +#define DWMAC_RX_VARDELAY_SHIFT 16 +#define DWMAC_RXN_VARDELAY_SHIFT 24 +#define DWMAC_TX_VARDELAY(d) ((d) << DWMAC_TX_VARDELAY_SHIFT) +#define DWMAC_TXN_VARDELAY(d) ((d) << DWMAC_TXN_VARDELAY_SHIFT) +#define DWMAC_RX_VARDELAY(d) ((d) << DWMAC_RX_VARDELAY_SHIFT) +#define DWMAC_RXN_VARDELAY(d) ((d) << DWMAC_RXN_VARDELAY_SHIFT) + +struct oxnas_dwmac { + struct device *dev; + struct clk *clk; + struct regmap *regmap; +}; + +static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac) +{ + unsigned int value; + int ret; + + /* Reset HW here before changing the glue configuration */ + ret = device_reset(dwmac->dev); + if (ret) + return ret; + + ret = clk_prepare_enable(dwmac->clk); + if (ret) + return ret; + + ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value); + if (ret < 0) { + clk_disable_unprepare(dwmac->clk); + return ret; + } + + /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */ + value |= BIT(DWMAC_CKEN_GTX) | + /* Use simple mux for 25/125 Mhz clock switching */ + BIT(DWMAC_SIMPLE_MUX) | + /* set auto switch tx clock source */ + BIT(DWMAC_AUTO_TX_SOURCE) | + /* enable tx & rx vardelay */ + BIT(DWMAC_CKEN_TX_OUT) | + BIT(DWMAC_CKEN_TXN_OUT) | + BIT(DWMAC_CKEN_TX_IN) | + BIT(DWMAC_CKEN_RX_OUT) | + BIT(DWMAC_CKEN_RXN_OUT) | + BIT(DWMAC_CKEN_RX_IN); + regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value); + + /* set tx & rx vardelay */ + value = DWMAC_TX_VARDELAY(4) | + DWMAC_TXN_VARDELAY(2) | + DWMAC_RX_VARDELAY(10) | + DWMAC_RXN_VARDELAY(8); + regmap_write(dwmac->regmap, OXNAS_DWMAC_DELAY_REGOFFSET, value); + + return 0; +} + +static int oxnas_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device_node *sysctrl; + struct oxnas_dwmac *dwmac; + int ret; + + sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0); + if (!sysctrl) { + dev_err(&pdev->dev, "failed to get sys-ctrl node\n"); + return -EINVAL; + } + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return -ENOMEM; + + dwmac->dev = &pdev->dev; + plat_dat->bsp_priv = dwmac; + + dwmac->regmap = syscon_node_to_regmap(sysctrl); + if (IS_ERR(dwmac->regmap)) { + dev_err(&pdev->dev, "failed to have sysctrl regmap\n"); + return PTR_ERR(dwmac->regmap); + } + + dwmac->clk = devm_clk_get(&pdev->dev, "gmac"); + if (IS_ERR(dwmac->clk)) + return PTR_ERR(dwmac->clk); + + ret = oxnas_dwmac_init(dwmac); + if (ret) + return ret; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +static int oxnas_dwmac_remove(struct platform_device *pdev) +{ + struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); + int ret = stmmac_dvr_remove(&pdev->dev); + + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int oxnas_dwmac_suspend(struct device *dev) +{ + struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); + int ret; + + ret = stmmac_suspend(dev); + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +static int oxnas_dwmac_resume(struct device *dev) +{ + struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev); + int ret; + + ret = oxnas_dwmac_init(dwmac); + if (ret) + return ret; + + ret = stmmac_resume(dev); + + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops, + oxnas_dwmac_suspend, oxnas_dwmac_resume); + +static const struct of_device_id oxnas_dwmac_match[] = { + { .compatible = "oxsemi,ox820-dwmac" }, + { } +}; +MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); + +static struct platform_driver oxnas_dwmac_driver = { + .probe = oxnas_dwmac_probe, + .remove = oxnas_dwmac_remove, + .driver = { + .name = "oxnas-dwmac", + .pm = &oxnas_dwmac_pm_ops, + .of_match_table = oxnas_dwmac_match, + }, +}; +module_platform_driver(oxnas_dwmac_driver); + +MODULE_AUTHOR("Neil Armstrong <[email protected]>"); +MODULE_DESCRIPTION("Oxford Semiconductor OXNAS DWMAC glue layer"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 3740a4417fa0..6b787d73b32a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -901,44 +901,6 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac) gmac_clk_enable(gmac, false); } -static int rk_gmac_init(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - return rk_gmac_powerup(bsp_priv); -} - -static void rk_gmac_exit(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - rk_gmac_powerdown(bsp_priv); -} - -static void rk_gmac_suspend(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - /* Keep the PHY up if we use Wake-on-Lan. */ - if (device_may_wakeup(&pdev->dev)) - return; - - rk_gmac_powerdown(bsp_priv); - bsp_priv->suspended = true; -} - -static void rk_gmac_resume(struct platform_device *pdev, void *priv) -{ - struct rk_priv_data *bsp_priv = priv; - - /* The PHY was up for Wake-on-Lan. */ - if (!bsp_priv->suspended) - return; - - rk_gmac_powerup(bsp_priv); - bsp_priv->suspended = false; -} - static void rk_fix_speed(void *priv, unsigned int speed) { struct rk_priv_data *bsp_priv = priv; @@ -974,23 +936,60 @@ static int rk_gmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); plat_dat->has_gmac = true; - plat_dat->init = rk_gmac_init; - plat_dat->exit = rk_gmac_exit; plat_dat->fix_mac_speed = rk_fix_speed; - plat_dat->suspend = rk_gmac_suspend; - plat_dat->resume = rk_gmac_resume; plat_dat->bsp_priv = rk_gmac_setup(pdev, data); if (IS_ERR(plat_dat->bsp_priv)) return PTR_ERR(plat_dat->bsp_priv); - ret = rk_gmac_init(pdev, plat_dat->bsp_priv); + ret = rk_gmac_powerup(plat_dat->bsp_priv); if (ret) return ret; return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); } +static int rk_gmac_remove(struct platform_device *pdev) +{ + struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev); + int ret = stmmac_dvr_remove(&pdev->dev); + + rk_gmac_powerdown(bsp_priv); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int rk_gmac_suspend(struct device *dev) +{ + struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); + int ret = stmmac_suspend(dev); + + /* Keep the PHY up if we use Wake-on-Lan. */ + if (!device_may_wakeup(dev)) { + rk_gmac_powerdown(bsp_priv); + bsp_priv->suspended = true; + } + + return ret; +} + +static int rk_gmac_resume(struct device *dev) +{ + struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); + + /* The PHY was up for Wake-on-Lan. */ + if (bsp_priv->suspended) { + rk_gmac_powerup(bsp_priv); + bsp_priv->suspended = false; + } + + return stmmac_resume(dev); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume); + static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops }, { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops }, @@ -1003,10 +1002,10 @@ MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); static struct platform_driver rk_gmac_dwmac_driver = { .probe = rk_gmac_probe, - .remove = stmmac_pltfr_remove, + .remove = rk_gmac_remove, .driver = { .name = "rk_gmac-dwmac", - .pm = &stmmac_pltfr_pm_ops, + .pm = &rk_gmac_pm_ops, .of_match_table = rk_gmac_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 58c05acc2aab..c9006ab083d5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -126,8 +126,8 @@ struct sti_dwmac { struct clk *clk; /* PHY clock */ u32 ctrl_reg; /* GMAC glue-logic control register */ int clk_sel_reg; /* GMAC ext clk selection register */ - struct device *dev; struct regmap *regmap; + bool gmac_en; u32 speed; void (*fix_retime_src)(void *priv, unsigned int speed); }; @@ -191,7 +191,7 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd) } } - if (src == TX_RETIME_SRC_CLKGEN && dwmac->clk && freq) + if (src == TX_RETIME_SRC_CLKGEN && freq) clk_set_rate(dwmac->clk, freq); regmap_update_bits(dwmac->regmap, reg, STIH4XX_RETIME_SRC_MASK, @@ -222,26 +222,20 @@ static void stid127_fix_retime_src(void *priv, u32 spd) freq = DWMAC_2_5MHZ; } - if (dwmac->clk && freq) + if (freq) clk_set_rate(dwmac->clk, freq); regmap_update_bits(dwmac->regmap, reg, STID127_RETIME_SRC_MASK, val); } -static int sti_dwmac_init(struct platform_device *pdev, void *priv) +static int sti_dwmac_set_mode(struct sti_dwmac *dwmac) { - struct sti_dwmac *dwmac = priv; struct regmap *regmap = dwmac->regmap; int iface = dwmac->interface; - struct device *dev = dwmac->dev; - struct device_node *np = dev->of_node; u32 reg = dwmac->ctrl_reg; u32 val; - if (dwmac->clk) - clk_prepare_enable(dwmac->clk); - - if (of_property_read_bool(np, "st,gmac_en")) + if (dwmac->gmac_en) regmap_update_bits(regmap, reg, EN_MASK, EN); regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]); @@ -249,18 +243,11 @@ static int sti_dwmac_init(struct platform_device *pdev, void *priv) val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII; regmap_update_bits(regmap, reg, ENMII_MASK, val); - dwmac->fix_retime_src(priv, dwmac->speed); + dwmac->fix_retime_src(dwmac, dwmac->speed); return 0; } -static void sti_dwmac_exit(struct platform_device *pdev, void *priv) -{ - struct sti_dwmac *dwmac = priv; - - if (dwmac->clk) - clk_disable_unprepare(dwmac->clk); -} static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, struct platform_device *pdev) { @@ -270,9 +257,6 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, struct regmap *regmap; int err; - if (!np) - return -EINVAL; - /* clk selection from extra syscfg register */ dwmac->clk_sel_reg = -ENXIO; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sti-clkconf"); @@ -289,9 +273,9 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac, return err; } - dwmac->dev = dev; dwmac->interface = of_get_phy_mode(np); dwmac->regmap = regmap; + dwmac->gmac_en = of_property_read_bool(np, "st,gmac_en"); dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk"); dwmac->tx_retime_src = TX_RETIME_SRC_NA; dwmac->speed = SPEED_100; @@ -357,17 +341,62 @@ static int sti_dwmac_probe(struct platform_device *pdev) dwmac->fix_retime_src = data->fix_retime_src; plat_dat->bsp_priv = dwmac; - plat_dat->init = sti_dwmac_init; - plat_dat->exit = sti_dwmac_exit; plat_dat->fix_mac_speed = data->fix_retime_src; - ret = sti_dwmac_init(pdev, plat_dat->bsp_priv); + ret = clk_prepare_enable(dwmac->clk); if (ret) return ret; - return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + ret = sti_dwmac_set_mode(dwmac); + if (ret) + goto disable_clk; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + goto disable_clk; + + return 0; + +disable_clk: + clk_disable_unprepare(dwmac->clk); + return ret; } +static int sti_dwmac_remove(struct platform_device *pdev) +{ + struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); + int ret = stmmac_dvr_remove(&pdev->dev); + + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int sti_dwmac_suspend(struct device *dev) +{ + struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev); + int ret = stmmac_suspend(dev); + + clk_disable_unprepare(dwmac->clk); + + return ret; +} + +static int sti_dwmac_resume(struct device *dev) +{ + struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev); + + clk_prepare_enable(dwmac->clk); + sti_dwmac_set_mode(dwmac); + + return stmmac_resume(dev); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend, + sti_dwmac_resume); + static const struct sti_dwmac_of_data stih4xx_dwmac_data = { .fix_retime_src = stih4xx_fix_retime_src, }; @@ -387,10 +416,10 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match); static struct platform_driver sti_dwmac_driver = { .probe = sti_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove = sti_dwmac_remove, .driver = { .name = "sti-dwmac", - .pm = &stmmac_pltfr_pm_ops, + .pm = &sti_dwmac_pm_ops, .of_match_table = sti_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 0a0d6a86f397..4d544c34c1f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -417,9 +417,7 @@ static int stmmac_pltfr_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); ret = stmmac_suspend(dev); - if (priv->plat->suspend) - priv->plat->suspend(pdev, priv->plat->bsp_priv); - else if (priv->plat->exit) + if (priv->plat->exit) priv->plat->exit(pdev, priv->plat->bsp_priv); return ret; @@ -438,9 +436,7 @@ static int stmmac_pltfr_resume(struct device *dev) struct stmmac_priv *priv = netdev_priv(ndev); struct platform_device *pdev = to_platform_device(dev); - if (priv->plat->resume) - priv->plat->resume(pdev, priv->plat->bsp_priv); - else if (priv->plat->init) + if (priv->plat->init) priv->plat->init(pdev, priv->plat->bsp_priv); return stmmac_resume(dev); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b1ddf89a19be..39d06e8e6aa7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1376,10 +1376,6 @@ static int cpsw_ndo_open(struct net_device *ndev) ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); if (!cpsw_common_res_usage_state(cpsw)) { - /* setup tx dma to fixed prio and zero offset */ - cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1); - cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0); - /* disable priority elevation */ __raw_writel(0, &cpsw->regs->ptype); diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index c3f35f11a8fd..56708a79a18a 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -124,6 +124,29 @@ struct cpdma_chan { int int_set, int_clear, td; }; +struct cpdma_control_info { + u32 reg; + u32 shift, mask; + int access; +#define ACCESS_RO BIT(0) +#define ACCESS_WO BIT(1) +#define ACCESS_RW (ACCESS_RO | ACCESS_WO) +}; + +static struct cpdma_control_info controls[] = { + [CPDMA_CMD_IDLE] = {CPDMA_DMACONTROL, 3, 1, ACCESS_WO}, + [CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL, 4, 1, ACCESS_RW}, + [CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL, 2, 1, ACCESS_RW}, + [CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL, 1, 1, ACCESS_RW}, + [CPDMA_TX_PRIO_FIXED] = {CPDMA_DMACONTROL, 0, 1, ACCESS_RW}, + [CPDMA_STAT_IDLE] = {CPDMA_DMASTATUS, 31, 1, ACCESS_RO}, + [CPDMA_STAT_TX_ERR_CODE] = {CPDMA_DMASTATUS, 20, 0xf, ACCESS_RW}, + [CPDMA_STAT_TX_ERR_CHAN] = {CPDMA_DMASTATUS, 16, 0x7, ACCESS_RW}, + [CPDMA_STAT_RX_ERR_CODE] = {CPDMA_DMASTATUS, 12, 0xf, ACCESS_RW}, + [CPDMA_STAT_RX_ERR_CHAN] = {CPDMA_DMASTATUS, 8, 0x7, ACCESS_RW}, + [CPDMA_RX_BUFFER_OFFSET] = {CPDMA_RXBUFFOFS, 0, 0xffff, ACCESS_RW}, +}; + #define tx_chan_num(chan) (chan) #define rx_chan_num(chan) ((chan) + CPDMA_MAX_CHANNELS) #define is_rx_chan(chan) ((chan)->chan_num >= CPDMA_MAX_CHANNELS) @@ -253,6 +276,31 @@ static void cpdma_desc_free(struct cpdma_desc_pool *pool, gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size); } +static int _cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value) +{ + struct cpdma_control_info *info = &controls[control]; + u32 val; + + if (!ctlr->params.has_ext_regs) + return -ENOTSUPP; + + if (ctlr->state != CPDMA_STATE_ACTIVE) + return -EINVAL; + + if (control < 0 || control >= ARRAY_SIZE(controls)) + return -ENOENT; + + if ((info->access & ACCESS_WO) != ACCESS_WO) + return -EPERM; + + val = dma_reg_read(ctlr, info->reg); + val &= ~(info->mask << info->shift); + val |= (value & info->mask) << info->shift; + dma_reg_write(ctlr, info->reg, val); + + return 0; +} + struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) { struct cpdma_ctlr *ctlr; @@ -324,6 +372,10 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr) if (ctlr->channels[i]) cpdma_chan_start(ctlr->channels[i]); } + + _cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, 1); + _cpdma_control_set(ctlr, CPDMA_RX_BUFFER_OFFSET, 0); + spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } @@ -335,7 +387,7 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr) int i; spin_lock_irqsave(&ctlr->lock, flags); - if (ctlr->state == CPDMA_STATE_TEARDOWN) { + if (ctlr->state != CPDMA_STATE_ACTIVE) { spin_unlock_irqrestore(&ctlr->lock, flags); return -EINVAL; } @@ -533,7 +585,7 @@ int cpdma_chan_destroy(struct cpdma_chan *chan) cpdma_chan_stop(chan); ctlr->channels[chan->chan_num] = NULL; ctlr->chan_num--; - + devm_kfree(ctlr->dev, chan); cpdma_chan_split_pool(ctlr); spin_unlock_irqrestore(&ctlr->lock, flags); @@ -874,29 +926,6 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable) return 0; } -struct cpdma_control_info { - u32 reg; - u32 shift, mask; - int access; -#define ACCESS_RO BIT(0) -#define ACCESS_WO BIT(1) -#define ACCESS_RW (ACCESS_RO | ACCESS_WO) -}; - -static struct cpdma_control_info controls[] = { - [CPDMA_CMD_IDLE] = {CPDMA_DMACONTROL, 3, 1, ACCESS_WO}, - [CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL, 4, 1, ACCESS_RW}, - [CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL, 2, 1, ACCESS_RW}, - [CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL, 1, 1, ACCESS_RW}, - [CPDMA_TX_PRIO_FIXED] = {CPDMA_DMACONTROL, 0, 1, ACCESS_RW}, - [CPDMA_STAT_IDLE] = {CPDMA_DMASTATUS, 31, 1, ACCESS_RO}, - [CPDMA_STAT_TX_ERR_CODE] = {CPDMA_DMASTATUS, 20, 0xf, ACCESS_RW}, - [CPDMA_STAT_TX_ERR_CHAN] = {CPDMA_DMASTATUS, 16, 0x7, ACCESS_RW}, - [CPDMA_STAT_RX_ERR_CODE] = {CPDMA_DMASTATUS, 12, 0xf, ACCESS_RW}, - [CPDMA_STAT_RX_ERR_CHAN] = {CPDMA_DMASTATUS, 8, 0x7, ACCESS_RW}, - [CPDMA_RX_BUFFER_OFFSET] = {CPDMA_RXBUFFOFS, 0, 0xffff, ACCESS_RW}, -}; - int cpdma_control_get(struct cpdma_ctlr *ctlr, int control) { unsigned long flags; @@ -931,35 +960,10 @@ unlock_ret: int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value) { unsigned long flags; - struct cpdma_control_info *info = &controls[control]; int ret; - u32 val; spin_lock_irqsave(&ctlr->lock, flags); - - ret = -ENOTSUPP; - if (!ctlr->params.has_ext_regs) - goto unlock_ret; - - ret = -EINVAL; - if (ctlr->state != CPDMA_STATE_ACTIVE) - goto unlock_ret; - - ret = -ENOENT; - if (control < 0 || control >= ARRAY_SIZE(controls)) - goto unlock_ret; - - ret = -EPERM; - if ((info->access & ACCESS_WO) != ACCESS_WO) - goto unlock_ret; - - val = dma_reg_read(ctlr, info->reg); - val &= ~(info->mask << info->shift); - val |= (value & info->mask) << info->shift; - dma_reg_write(ctlr, info->reg, val); - ret = 0; - -unlock_ret: + ret = _cpdma_control_set(ctlr, control, value); spin_unlock_irqrestore(&ctlr->lock, flags); return ret; } diff --git a/drivers/net/mii.c b/drivers/net/mii.c index 0443546fc427..6d953c53eed6 100644 --- a/drivers/net/mii.c +++ b/drivers/net/mii.c @@ -213,6 +213,8 @@ int mii_ethtool_get_link_ksettings(struct mii_if_info *mii, SPEED_100 : SPEED_10)); cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; + + lp_advertising = 0; } mii->full_duplex = cmd->base.duplex; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index ff31c10a3485..d361835b315d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -217,7 +217,7 @@ config BROADCOM_PHY select BCM_NET_PHYLIB ---help--- Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, - BCM5481 and BCM5482 PHYs. + BCM5481, BCM54810 and BCM5482 PHYs. config CICADA_PHY tristate "Cicada PHYs" @@ -277,6 +277,11 @@ config MARVELL_PHY ---help--- Currently has a driver for the 88E1011S +config MESON_GXL_PHY + tristate "Amlogic Meson GXL Internal PHY" + ---help--- + Currently has a driver for the Amlogic Meson GXL Internal PHY + config MICREL_PHY tristate "Micrel PHYs" ---help--- diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 86d12cd3fbf0..356859ac7c18 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_INTEL_XWAY_PHY) += intel-xway.o obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o obj-$(CONFIG_LXT_PHY) += lxt.o obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o obj-$(CONFIG_MICREL_PHY) += micrel.o obj-$(CONFIG_MICROCHIP_PHY) += microchip.o diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 583ef8a2ec8d..b1e32e9be1b3 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -18,7 +18,7 @@ #include <linux/module.h> #include <linux/phy.h> #include <linux/brcmphy.h> - +#include <linux/of.h> #define BRCM_PHY_MODEL(phydev) \ ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask) @@ -30,11 +30,49 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); +static int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum) +{ + /* The register must be written to both the Shadow Register Select and + * the Shadow Read Register Selector + */ + phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | + regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT); + return phy_read(phydev, MII_BCM54XX_AUX_CTL); +} + static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val) { return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val); } +static int bcm54810_config(struct phy_device *phydev) +{ + int rc, val; + + val = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL); + val &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN; + rc = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL, + val); + if (rc < 0) + return rc; + + val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); + val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + val |= MII_BCM54XX_AUXCTL_MISC_WREN; + rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + val); + if (rc < 0) + return rc; + + val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); + val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; + rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + if (rc < 0) + return rc; + + return 0; +} + /* Needs SMDSP clock enabled via bcm54xx_phydsp_config() */ static int bcm50610_a0_workaround(struct phy_device *phydev) { @@ -207,6 +245,12 @@ static int bcm54xx_config_init(struct phy_device *phydev) (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE)) bcm54xx_adjust_rxrefclk(phydev); + if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) { + err = bcm54810_config(phydev); + if (err) + return err; + } + bcm54xx_phydsp_config(phydev); return 0; @@ -304,6 +348,7 @@ static int bcm5482_read_status(struct phy_device *phydev) static int bcm5481_config_aneg(struct phy_device *phydev) { + struct device_node *np = phydev->mdio.dev.of_node; int ret; /* Aneg firsly. */ @@ -334,6 +379,14 @@ static int bcm5481_config_aneg(struct phy_device *phydev) phy_write(phydev, 0x18, reg); } + if (of_property_read_bool(np, "enet-phy-lane-swap")) { + /* Lane Swap - Undocumented register...magic! */ + ret = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_SEL_ER + 0x9, + 0x11B); + if (ret < 0) + return ret; + } + return ret; } @@ -568,6 +621,18 @@ static struct phy_driver broadcom_drivers[] = { .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, }, { + .phy_id = PHY_ID_BCM54810, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM54810", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .config_aneg = bcm5481_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm_phy_ack_intr, + .config_intr = bcm_phy_config_intr, +}, { .phy_id = PHY_ID_BCM5482, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5482", @@ -651,6 +716,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, { PHY_ID_BCM5481, 0xfffffff0 }, + { PHY_ID_BCM54810, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM50610, 0xfffffff0 }, { PHY_ID_BCM50610M, 0xfffffff0 }, diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 7a240fce3a7e..e2460a57e4b1 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -375,7 +375,7 @@ static int periodic_output(struct dp83640_clock *clock, /* ptp clock methods */ -static int ptp_dp83640_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int ptp_dp83640_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { struct dp83640_clock *clock = container_of(ptp, struct dp83640_clock, caps); @@ -384,13 +384,13 @@ static int ptp_dp83640_adjfreq(struct ptp_clock_info *ptp, s32 ppb) int neg_adj = 0; u16 hi, lo; - if (ppb < 0) { + if (scaled_ppm < 0) { neg_adj = 1; - ppb = -ppb; + scaled_ppm = -scaled_ppm; } - rate = ppb; - rate <<= 26; - rate = div_u64(rate, 1953125); + rate = scaled_ppm; + rate <<= 13; + rate = div_u64(rate, 15625); hi = (rate >> 16) & PTP_RATE_HI_MASK; if (neg_adj) @@ -1035,7 +1035,7 @@ static void dp83640_clock_init(struct dp83640_clock *clock, struct mii_bus *bus) clock->caps.n_per_out = N_PER_OUT; clock->caps.n_pins = DP83640_N_PINS; clock->caps.pps = 0; - clock->caps.adjfreq = ptp_dp83640_adjfreq; + clock->caps.adjfine = ptp_dp83640_adjfine; clock->caps.adjtime = ptp_dp83640_adjtime; clock->caps.gettime64 = ptp_dp83640_gettime; clock->caps.settime64 = ptp_dp83640_settime; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index c2dcf02df202..fa31f50824d3 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -361,7 +361,7 @@ static int m88e1111_config_aneg(struct phy_device *phydev) static int marvell_of_reg_init(struct phy_device *phydev) { const __be32 *paddr; - int len, i, saved_page, current_page, page_changed, ret; + int len, i, saved_page, current_page, ret; if (!phydev->mdio.dev.of_node) return 0; @@ -374,7 +374,6 @@ static int marvell_of_reg_init(struct phy_device *phydev) saved_page = phy_read(phydev, MII_MARVELL_PHY_PAGE); if (saved_page < 0) return saved_page; - page_changed = 0; current_page = saved_page; ret = 0; @@ -388,7 +387,6 @@ static int marvell_of_reg_init(struct phy_device *phydev) if (reg_page != current_page) { current_page = reg_page; - page_changed = 1; ret = phy_write(phydev, MII_MARVELL_PHY_PAGE, reg_page); if (ret < 0) goto err; @@ -411,7 +409,7 @@ static int marvell_of_reg_init(struct phy_device *phydev) } err: - if (page_changed) { + if (current_page != saved_page) { i = phy_write(phydev, MII_MARVELL_PHY_PAGE, saved_page); if (ret == 0) ret = i; diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c index d0bed52c8d16..6a33646bdf05 100644 --- a/drivers/net/phy/mdio-mux-mmioreg.c +++ b/drivers/net/phy/mdio-mux-mmioreg.c @@ -21,7 +21,8 @@ struct mdio_mux_mmioreg_state { void *mux_handle; phys_addr_t phys; - uint8_t mask; + unsigned int iosize; + unsigned int mask; }; /* @@ -47,17 +48,47 @@ static int mdio_mux_mmioreg_switch_fn(int current_child, int desired_child, struct mdio_mux_mmioreg_state *s = data; if (current_child ^ desired_child) { - void __iomem *p = ioremap(s->phys, 1); - uint8_t x, y; - + void __iomem *p = ioremap(s->phys, s->iosize); if (!p) return -ENOMEM; - x = ioread8(p); - y = (x & ~s->mask) | desired_child; - if (x != y) { - iowrite8((x & ~s->mask) | desired_child, p); - pr_debug("%s: %02x -> %02x\n", __func__, x, y); + switch (s->iosize) { + case sizeof(uint8_t): { + uint8_t x, y; + + x = ioread8(p); + y = (x & ~s->mask) | desired_child; + if (x != y) { + iowrite8((x & ~s->mask) | desired_child, p); + pr_debug("%s: %02x -> %02x\n", __func__, x, y); + } + + break; + } + case sizeof(uint16_t): { + uint16_t x, y; + + x = ioread16(p); + y = (x & ~s->mask) | desired_child; + if (x != y) { + iowrite16((x & ~s->mask) | desired_child, p); + pr_debug("%s: %04x -> %04x\n", __func__, x, y); + } + + break; + } + case sizeof(uint32_t): { + uint32_t x, y; + + x = ioread32(p); + y = (x & ~s->mask) | desired_child; + if (x != y) { + iowrite32((x & ~s->mask) | desired_child, p); + pr_debug("%s: %08x -> %08x\n", __func__, x, y); + } + + break; + } } iounmap(p); @@ -88,8 +119,11 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) } s->phys = res.start; - if (resource_size(&res) != sizeof(uint8_t)) { - dev_err(&pdev->dev, "only 8-bit registers are supported\n"); + s->iosize = resource_size(&res); + if (s->iosize != sizeof(uint8_t) && + s->iosize != sizeof(uint16_t) && + s->iosize != sizeof(uint32_t)) { + dev_err(&pdev->dev, "only 8/16/32-bit registers are supported\n"); return -EINVAL; } @@ -98,8 +132,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev) dev_err(&pdev->dev, "missing or invalid mux-mask property\n"); return -ENODEV; } - if (be32_to_cpup(iprop) > 255) { - dev_err(&pdev->dev, "only 8-bit registers are supported\n"); + if (be32_to_cpup(iprop) >= BIT(s->iosize * 8)) { + dev_err(&pdev->dev, "only 8/16/32-bit registers are supported\n"); return -EINVAL; } s->mask = be32_to_cpup(iprop); diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c new file mode 100644 index 000000000000..1ea69b7585d9 --- /dev/null +++ b/drivers/net/phy/meson-gxl.c @@ -0,0 +1,81 @@ +/* + * Amlogic Meson GXL Internal PHY Driver + * + * Copyright (C) 2015 Amlogic, Inc. All rights reserved. + * Copyright (C) 2016 BayLibre, SAS. All rights reserved. + * Author: Neil Armstrong <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mii.h> +#include <linux/ethtool.h> +#include <linux/phy.h> +#include <linux/netdevice.h> + +static int meson_gxl_config_init(struct phy_device *phydev) +{ + /* Enable Analog and DSP register Bank access by */ + phy_write(phydev, 0x14, 0x0000); + phy_write(phydev, 0x14, 0x0400); + phy_write(phydev, 0x14, 0x0000); + phy_write(phydev, 0x14, 0x0400); + + /* Write Analog register 23 */ + phy_write(phydev, 0x17, 0x8E0D); + phy_write(phydev, 0x14, 0x4417); + + /* Enable fractional PLL */ + phy_write(phydev, 0x17, 0x0005); + phy_write(phydev, 0x14, 0x5C1B); + + /* Program fraction FR_PLL_DIV1 */ + phy_write(phydev, 0x17, 0x029A); + phy_write(phydev, 0x14, 0x5C1D); + + /* Program fraction FR_PLL_DIV1 */ + phy_write(phydev, 0x17, 0xAAAA); + phy_write(phydev, 0x14, 0x5C1C); + + return 0; +} + +static struct phy_driver meson_gxl_phy[] = { + { + .phy_id = 0x01814400, + .phy_id_mask = 0xfffffff0, + .name = "Meson GXL Internal PHY", + .features = PHY_BASIC_FEATURES, + .flags = PHY_IS_INTERNAL, + .config_init = meson_gxl_config_init, + .config_aneg = genphy_config_aneg, + .aneg_done = genphy_aneg_done, + .read_status = genphy_read_status, + .suspend = genphy_suspend, + .resume = genphy_resume, + }, +}; + +static struct mdio_device_id __maybe_unused meson_gxl_tbl[] = { + { 0x01814400, 0xfffffff0 }, + { } +}; + +module_phy_driver(meson_gxl_phy); + +MODULE_DEVICE_TABLE(mdio, meson_gxl_tbl); + +MODULE_DESCRIPTION("Amlogic Meson GXL Internal PHY driver"); +MODULE_AUTHOR("Baoqi wang"); +MODULE_AUTHOR("Neil Armstrong <[email protected]>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 2f94c60d4939..e6dd222fddb1 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -143,13 +143,14 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) * Returns > 0 on success or < 0 on error. 0 means that auto-negotiation * is still pending. */ -static inline int phy_aneg_done(struct phy_device *phydev) +int phy_aneg_done(struct phy_device *phydev) { if (phydev->drv->aneg_done) return phydev->drv->aneg_done(phydev); return genphy_aneg_done(phydev); } +EXPORT_SYMBOL(phy_aneg_done); /* A structure for mapping a particular speed and duplex * combination to a particular SUPPORTED and ADVERTISED value diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 8674e188b697..55a4488633e4 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -889,16 +889,16 @@ static int connect_ctrl_ring(struct backend_info *be) unsigned int evtchn; int err; - err = xenbus_gather(XBT_NIL, dev->otherend, - "ctrl-ring-ref", "%u", &val, NULL); - if (err) + err = xenbus_scanf(XBT_NIL, dev->otherend, + "ctrl-ring-ref", "%u", &val); + if (err < 0) goto done; /* The frontend does not have a control ring */ ring_ref = val; - err = xenbus_gather(XBT_NIL, dev->otherend, - "event-channel-ctrl", "%u", &val, NULL); - if (err) { + err = xenbus_scanf(XBT_NIL, dev->otherend, + "event-channel-ctrl", "%u", &val); + if (err < 0) { xenbus_dev_fatal(dev, err, "reading %s/event-channel-ctrl", dev->otherend); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 86280b7e41f3..9c13381b6966 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -153,7 +153,10 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) s32 ppb = scaled_ppm_to_ppb(tx->freq); if (ppb > ops->max_adj || ppb < -ops->max_adj) return -ERANGE; - err = ops->adjfreq(ops, ppb); + if (ops->adjfine) + err = ops->adjfine(ops, tx->freq); + else + err = ops->adjfreq(ops, ppb); ptp->dialed_frequency = tx->freq; } else if (tx->modes == 0) { tx->freq = ptp->dialed_frequency; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index edcd96ded8aa..01c1487277b2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -234,6 +234,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); +void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); @@ -303,6 +304,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) return ERR_PTR(-EOPNOTSUPP); } +static inline void bpf_prog_sub(struct bpf_prog *prog, int i) +{ +} + static inline void bpf_prog_put(struct bpf_prog *prog) { } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 60def78c4e12..848dc508ef57 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,6 +13,7 @@ #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 +#define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 #define PHY_ID_BCM5421 0x002060e0 @@ -56,6 +57,7 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 + /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) #define PHY_BRCM_7XXX_PATCH(x) ((x) & 0xff) @@ -110,6 +112,8 @@ #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 +#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 @@ -191,6 +195,12 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ +/* BCM54810 Registers */ +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL (MII_BCM54XX_EXP_SEL_ER + 0x90) +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) +#define BCM54810_SHD_CLK_CTL 0x3 +#define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73..bf1907d96097 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu; * Must only be called under the protection established by * debugfs_use_file_start(). */ -static inline const struct file_operations *debugfs_real_fops(struct file *filp) +static inline const struct file_operations * +debugfs_real_fops(const struct file *filp) __must_hold(&debugfs_srcu) { /* diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..93756585521f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -64,6 +64,10 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; + __s32 seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + __s32 seg6_require_hmac; +#endif struct ctl_table_header *sysctl_header; }; @@ -229,8 +233,9 @@ struct ipv6_pinfo { rxflow:1, rxtclass:1, rxpmtu:1, - rxorigdstaddr:1; - /* 2 bits hole */ + rxorigdstaddr:1, + recvfragsize:1; + /* 1 bits hole */ } bits; __u16 all; } rxopt; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0dcd287f4bd0..949b24b6c479 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -42,6 +42,10 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, }; +enum { + MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), +}; + #define LEFTOVERS_RULE_NUM 2 static inline void build_leftovers_ft_param(int *priority, int *n_ent, @@ -97,13 +101,15 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, int max_num_groups, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, @@ -124,14 +130,19 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_flow_act { + u32 action; + u32 flow_tag; + u32 encap_id; +}; + /* Single destination per rule. * Group ID is implied by the match criteria. */ struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num); void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 66fd61c681d9..d64135a0ab71 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -468,17 +468,6 @@ static inline void napi_complete(struct napi_struct *n) } /** - * napi_hash_add - add a NAPI to global hashtable - * @napi: NAPI context - * - * Generate a new napi_id and store a @napi under it in napi_hash. - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). - * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future Linux version. - */ -void napi_hash_add(struct napi_struct *napi); - -/** * napi_hash_del - remove a NAPI from global table * @napi: NAPI context * diff --git a/include/linux/phy.h b/include/linux/phy.h index e7e1fd382564..9880d73a2c3d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -786,6 +786,7 @@ void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); void phy_stop(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); +int phy_aneg_done(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 5ad54fc66cf0..b76d47aba564 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -58,7 +58,14 @@ struct system_device_crosststamp; * * clock operations * + * @adjfine: Adjusts the frequency of the hardware clock. + * parameter scaled_ppm: Desired frequency offset from + * nominal frequency in parts per million, but with a + * 16 bit binary fractional field. + * * @adjfreq: Adjusts the frequency of the hardware clock. + * This method is deprecated. New drivers should implement + * the @adjfine method instead. * parameter delta: Desired frequency offset from nominal frequency * in parts per billion * @@ -108,6 +115,7 @@ struct ptp_clock_info { int n_pins; int pps; struct ptp_pin_desc *pin_config; + int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); diff --git a/include/linux/seg6.h b/include/linux/seg6.h new file mode 100644 index 000000000000..7a66d2b4c5a6 --- /dev/null +++ b/include/linux/seg6.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_H +#define _LINUX_SEG6_H + +#include <uapi/linux/seg6.h> + +#endif diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h new file mode 100644 index 000000000000..d6c3fb4f3734 --- /dev/null +++ b/include/linux/seg6_genl.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_GENL_H +#define _LINUX_SEG6_GENL_H + +#include <uapi/linux/seg6_genl.h> + +#endif diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h new file mode 100644 index 000000000000..da437ebdc6cd --- /dev/null +++ b/include/linux/seg6_hmac.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_HMAC_H +#define _LINUX_SEG6_HMAC_H + +#include <uapi/linux/seg6_hmac.h> + +#endif diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..5377cf6a5a02 --- /dev/null +++ b/include/linux/seg6_iptunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_IPTUNNEL_H +#define _LINUX_SEG6_IPTUNNEL_H + +#include <uapi/linux/seg6_iptunnel.h> + +#endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc6e23eaac91..a4aeeca7e805 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 705840e0438f..3537fb33cc90 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -135,8 +135,6 @@ struct plat_stmmacenet_data { void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); - void (*suspend)(struct platform_device *pdev, void *priv); - void (*resume)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; int has_gmac4; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b85218..32a7c7e35b71 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -176,8 +176,6 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ - struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ - u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/include/net/act_api.h b/include/net/act_api.h index 82f3c912a5b1..d8eae87ea778 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -42,7 +42,6 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; }; -#define tcf_act common.tcfa_act #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 6965c8f68ade..701fc814d0af 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -115,6 +115,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be16 flags, __be64 tunnel_id, int md_size) @@ -127,7 +128,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, - 0, 0, 0, tunnel_id, flags); + 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } @@ -139,12 +140,13 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, - flags, tunnel_id, md_size); + 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be32 label, __be16 flags, __be64 tunnel_id, @@ -162,7 +164,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info->key.tun_flags = flags; info->key.tun_id = tunnel_id; info->key.tp_src = 0; - info->key.tp_dst = 0; + info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; @@ -183,7 +185,7 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, - ip6_flowlabel(ip6h), flags, tunnel_id, + 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 456e4a6006ab..8dbfdf728cd8 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include <net/flow.h> #include <net/rtnetlink.h> +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -30,6 +35,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -92,7 +98,8 @@ struct fib_rules_ops { [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 } + [FRA_L3MDEV] = { .type = NLA_U8 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..6bbbca8af8e3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,6 +11,7 @@ #include <linux/in6.h> #include <linux/atomic.h> #include <net/flow_dissector.h> +#include <linux/uidgid.h> /* * ifindex generation is per-net namespace, and loopback is @@ -37,6 +38,7 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; + kuid_t flowic_uid; }; union flowi_uli { @@ -74,6 +76,7 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -93,7 +96,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -104,6 +108,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -131,6 +136,7 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; /* Note: flowi6_tos is encoded in flowlabel, too. */ @@ -176,6 +182,7 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d9534927d93b..c4f31666afd2 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -128,6 +128,11 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ + FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 3ec87bacc0f5..a34275be3600 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -48,7 +48,7 @@ struct genl_info; * @n_ops: number of operations supported by this family */ struct genl_family { - unsigned int id; /* private */ + int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 236a81034fef..c9cff977a7fb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -228,6 +228,7 @@ struct inet_sock { #define IP_CMSG_PASSSEC BIT(5) #define IP_CMSG_ORIGDSTADDR BIT(6) #define IP_CMSG_CHECKSUM BIT(7) +#define IP_CMSG_RECVFRAGSIZE BIT(8) /** * sk_to_full_sk - Access to a full socket diff --git a/include/net/ip.h b/include/net/ip.h index 5413883ac47f..f48c67cab222 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -179,6 +179,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -578,7 +579,8 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -600,7 +602,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0, 0); + ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f83e78d071a3..9dc2c182a263 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,9 +140,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark); + u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8fed1cd78658..0a3622bf086f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -932,7 +932,8 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); */ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p); + u8 *proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 10d0848f5b8a..de7745e2edcc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -85,6 +85,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t fib6_sernum; + struct seg6_pernet_data *seg6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index cd334c9584e9..f1b76b8e6d2d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -6,6 +6,8 @@ #include <linux/if_vlan.h> #include <net/sch_generic.h> +#define DEFAULT_TX_QUEUE_LEN 1000 + struct qdisc_walker { int stop; int skip; diff --git a/include/net/route.h b/include/net/route.h index 0429d47cad25..c0874c87c173 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,7 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -269,7 +269,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/net/seg6.h b/include/net/seg6.h new file mode 100644 index 000000000000..4e0357517d79 --- /dev/null +++ b/include/net/seg6.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <[email protected]> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_H +#define _NET_SEG6_H + +#include <linux/net.h> +#include <linux/ipv6.h> +#include <net/lwtunnel.h> +#include <linux/seg6.h> +#include <linux/rhashtable.h> + +static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, + __be32 to) +{ + __be32 diff[] = { ~from, to }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, + __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +struct seg6_pernet_data { + struct mutex lock; + struct in6_addr __rcu *tun_src; +#ifdef CONFIG_IPV6_SEG6_HMAC + struct rhashtable hmac_infos; +#endif +}; + +static inline struct seg6_pernet_data *seg6_pernet(struct net *net) +{ + return net->ipv6.seg6_data; +} + +extern int seg6_init(void); +extern void seg6_exit(void); +extern int seg6_iptunnel_init(void); +extern void seg6_iptunnel_exit(void); + +extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); + +#endif diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h new file mode 100644 index 000000000000..69c3a106056b --- /dev/null +++ b/include/net/seg6_hmac.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <[email protected]> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_HMAC_H +#define _NET_SEG6_HMAC_H + +#include <net/flow.h> +#include <net/ip6_fib.h> +#include <net/sock.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/route.h> +#include <net/seg6.h> +#include <linux/seg6_hmac.h> +#include <linux/rhashtable.h> + +#define SEG6_HMAC_MAX_DIGESTSIZE 160 +#define SEG6_HMAC_RING_SIZE 256 + +struct seg6_hmac_info { + struct rhash_head node; + struct rcu_head rcu; + + u32 hmackeyid; + char secret[SEG6_HMAC_SECRET_LEN]; + u8 slen; + u8 alg_id; +}; + +struct seg6_hmac_algo { + u8 alg_id; + char name[64]; + struct crypto_shash * __percpu *tfms; + struct shash_desc * __percpu *shashs; +}; + +extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, + struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, + u8 *output); +extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key); +extern int seg6_hmac_info_add(struct net *net, u32 key, + struct seg6_hmac_info *hinfo); +extern int seg6_hmac_info_del(struct net *net, u32 key); +extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh); +extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +extern int seg6_hmac_init(void); +extern void seg6_hmac_exit(void); +extern int seg6_hmac_net_init(struct net *net); +extern void seg6_hmac_net_exit(struct net *net); + +#endif diff --git a/include/net/sock.h b/include/net/sock.h index 93331a1492db..cf617ee16723 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,6 +419,7 @@ struct sock { u32 sk_max_ack_backlog; __u32 sk_priority; __u32 sk_mark; + kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -1664,6 +1665,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1671,6 +1673,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 253f8da6c2a6..efef0b4b1b2b 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -12,6 +12,8 @@ #define __NET_TC_TUNNEL_KEY_H #include <net/act_api.h> +#include <linux/tc_act/tc_tunnel_key.h> +#include <net/dst_metadata.h> struct tcf_tunnel_key_params { struct rcu_head rcu; @@ -27,4 +29,39 @@ struct tcf_tunnel_key { #define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) +static inline bool is_tcf_tunnel_set(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; +#endif + return false; +} + +static inline bool is_tcf_tunnel_release(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; +#endif + return false; +} + +static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + return ¶ms->tcft_enc_metadata->u.tun_info; +#else + return NULL; +#endif +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 6134f37ba3ab..e6e4e19be387 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -248,6 +248,21 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, /* net/ipv4/udp.c */ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); +static inline struct sk_buff * +__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, + int *off, int *err) +{ + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + udp_skb_destructor, peeked, off, err); +} +static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *err) +{ + int peeked, off = 0; + + return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); +} void udp_v4_early_demux(struct sk_buff *skb); int udp_get_port(struct sock *sk, unsigned short snum, diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 14404b3ebb89..bbf02a63a011 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -51,6 +56,7 @@ enum { FRA_OIFNAME, FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index eaf94919291a..4e557f4e9553 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -117,6 +117,7 @@ struct in_addr { #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 +#define IP_RECVFRAGSIZE 25 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index b39ea4f2e701..46444f8fbee4 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -283,6 +283,7 @@ struct in6_flowlabel_req { #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 +#define IPV6_RECVFRAGSIZE 77 /* * Multicast Routing: diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8c2772340c3f..53561be1ac21 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -39,6 +39,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ +#define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ /* * routing header @@ -178,6 +179,8 @@ enum { DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, + DEVCONF_SEG6_ENABLED, + DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_MAX }; diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 4bd27d0270a2..5daa48e2571e 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -124,8 +124,8 @@ enum { L2TP_ATTR_STATS, /* nested */ L2TP_ATTR_IP6_SADDR, /* struct in6_addr */ L2TP_ATTR_IP6_DADDR, /* struct in6_addr */ - L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* u8 */ - L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* u8 */ + L2TP_ATTR_UDP_ZERO_CSUM6_TX, /* flag */ + L2TP_ATTR_UDP_ZERO_CSUM6_RX, /* flag */ L2TP_ATTR_PAD, __L2TP_ATTR_MAX, }; diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index a478fe80e203..453cc6215bfd 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -9,6 +9,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP, LWTUNNEL_ENCAP_ILA, LWTUNNEL_ENCAP_IP6, + LWTUNNEL_ENCAP_SEG6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 59ed3992c760..375d812fea36 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -705,6 +705,15 @@ enum ovs_nat_attr { #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1) +/* + * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument. + * @addresses: Source and destination MAC addresses. + * @eth_type: Ethernet type + */ +struct ovs_action_push_eth { + struct ovs_key_ethernet addresses; +}; + /** * enum ovs_action_attr - Action types. * @@ -738,6 +747,10 @@ enum ovs_nat_attr { * is no MPLS label stack, as determined by ethertype, no action is taken. * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related * entries in the flow key. + * @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the + * packet. + * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the + * packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -765,6 +778,8 @@ enum ovs_action_attr { * bits. */ OVS_ACTION_ATTR_CT, /* Nested OVS_CT_ATTR_* . */ OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */ + OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ + OVS_ACTION_ATTR_POP_ETH, /* No argument. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8fd715f806a2..86786d45ee66 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -447,6 +447,16 @@ enum { TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ __TCA_FLOWER_MAX, }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5a78be518101..e14377f2ec27 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -318,6 +318,7 @@ enum rtattr_type_t { RTA_ENCAP, RTA_EXPIRES, RTA_PAD, + RTA_UID, __RTA_MAX }; diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h new file mode 100644 index 000000000000..c396a8052f73 --- /dev/null +++ b/include/uapi/linux/seg6.h @@ -0,0 +1,54 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <[email protected]> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_H +#define _UAPI_LINUX_SEG6_H + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; + __u8 flag_1; + __u8 flag_2; + __u8 reserved; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_CLEANUP (1 << 7) +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) +#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h new file mode 100644 index 000000000000..fcf1c60d7df3 --- /dev/null +++ b/include/uapi/linux/seg6_genl.h @@ -0,0 +1,32 @@ +#ifndef _UAPI_LINUX_SEG6_GENL_H +#define _UAPI_LINUX_SEG6_GENL_H + +#define SEG6_GENL_NAME "SEG6" +#define SEG6_GENL_VERSION 0x1 + +enum { + SEG6_ATTR_UNSPEC, + SEG6_ATTR_DST, + SEG6_ATTR_DSTLEN, + SEG6_ATTR_HMACKEYID, + SEG6_ATTR_SECRET, + SEG6_ATTR_SECRETLEN, + SEG6_ATTR_ALGID, + SEG6_ATTR_HMACINFO, + __SEG6_ATTR_MAX, +}; + +#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1) + +enum { + SEG6_CMD_UNSPEC, + SEG6_CMD_SETHMAC, + SEG6_CMD_DUMPHMAC, + SEG6_CMD_SET_TUNSRC, + SEG6_CMD_GET_TUNSRC, + __SEG6_CMD_MAX, +}; + +#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1) + +#endif diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h new file mode 100644 index 000000000000..b652dfd51bc5 --- /dev/null +++ b/include/uapi/linux/seg6_hmac.h @@ -0,0 +1,21 @@ +#ifndef _UAPI_LINUX_SEG6_HMAC_H +#define _UAPI_LINUX_SEG6_HMAC_H + +#include <linux/seg6.h> + +#define SEG6_HMAC_SECRET_LEN 64 +#define SEG6_HMAC_FIELD_LEN 32 + +struct sr6_tlv_hmac { + struct sr6_tlv tlvhdr; + __u16 reserved; + __be32 hmackeyid; + __u8 hmac[SEG6_HMAC_FIELD_LEN]; +}; + +enum { + SEG6_HMAC_ALGO_SHA1 = 1, + SEG6_HMAC_ALGO_SHA256 = 2, +}; + +#endif diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..0f7dbd280a9c --- /dev/null +++ b/include/uapi/linux/seg6_iptunnel.h @@ -0,0 +1,44 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <[email protected]> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H +#define _UAPI_LINUX_SEG6_IPTUNNEL_H + +enum { + SEG6_IPTUNNEL_UNSPEC, + SEG6_IPTUNNEL_SRH, + __SEG6_IPTUNNEL_MAX, +}; +#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) + +struct seg6_iptunnel_encap { + int mode; + struct ipv6_sr_hdr srh[0]; +}; + +#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) + +enum { + SEG6_IPTUN_MODE_INLINE, + SEG6_IPTUN_MODE_ENCAP, +}; + +static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) +{ + int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); + + return ((tuninfo->srh->hdrlen + 1) << 3) + + (encap * sizeof(struct ipv6hdr)); +} + +#endif diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 890106ff16e6..84ea55e1076b 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -33,6 +33,7 @@ enum { TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ TCA_TUNNEL_KEY_PAD, + TCA_TUNNEL_KEY_ENC_DST_PORT, /* be16 */ __TCA_TUNNEL_KEY_MAX, }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 228f962447a5..23eb2050f15e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -680,6 +680,17 @@ struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) } EXPORT_SYMBOL_GPL(bpf_prog_add); +void bpf_prog_sub(struct bpf_prog *prog, int i) +{ + /* Only to be used for undoing previous bpf_prog_add() in some + * error path. We still know that another entity in our call + * path holds a reference to the program, thus atomic_sub() can + * be safely used in such cases! + */ + WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); +} +EXPORT_SYMBOL_GPL(bpf_prog_sub); + struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) { return bpf_prog_add(prog, 1); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 900257578934..89f787ca47ef 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1499,7 +1499,6 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, { struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE; - bool min_set = false, max_set = false; u8 opcode = BPF_OP(insn->code); dst_reg = ®s[insn->dst_reg]; @@ -1522,7 +1521,6 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, } else if (insn->imm < BPF_REGISTER_MAX_RANGE && (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { min_val = max_val = insn->imm; - min_set = max_set = true; } /* We don't know anything about what was done to this register, mark it diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 0b9be62aee45..bd39247e2f0f 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -698,7 +698,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!forw_packet_aggr->skb) { - batadv_forw_packet_free(forw_packet_aggr); + batadv_forw_packet_free(forw_packet_aggr, true); return; } @@ -1611,7 +1611,7 @@ out: if (hardif_neigh) batadv_hardif_neigh_put(hardif_neigh); - kfree_skb(skb_priv); + consume_skb(skb_priv); } /** @@ -1783,6 +1783,7 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; struct batadv_priv *bat_priv; + bool dropped = false; delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, @@ -1792,8 +1793,10 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) hlist_del(&forw_packet->list); spin_unlock_bh(&bat_priv->forw_bat_list_lock); - if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { + dropped = true; goto out; + } batadv_iv_ogm_emit(forw_packet); @@ -1810,7 +1813,7 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) batadv_iv_ogm_schedule(forw_packet->if_incoming); out: - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, dropped); } static int batadv_iv_ogm_receive(struct sk_buff *skb, @@ -1820,17 +1823,18 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_ogm_packet *ogm_packet; u8 *packet_pos; int ogm_offset; - bool ret; + bool res; + int ret = NET_RX_DROP; - ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); - if (!ret) - return NET_RX_DROP; + res = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); + if (!res) + goto free_skb; /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface * that does not have B.A.T.M.A.N. IV enabled ? */ if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable) - return NET_RX_DROP; + goto free_skb; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, @@ -1851,8 +1855,15 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, ogm_packet = (struct batadv_ogm_packet *)packet_pos; } - kfree_skb(skb); - return NET_RX_SUCCESS; + ret = NET_RX_SUCCESS; + +free_skb: + if (ret == NET_RX_SUCCESS) + consume_skb(skb); + else + kfree_skb(skb); + + return ret; } #ifdef CONFIG_BATMAN_ADV_DEBUGFS diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index ee08540ce503..54bdd415e8df 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -492,20 +492,21 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_elp_packet *elp_packet; struct batadv_hard_iface *primary_if; struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); - bool ret; + bool res; + int ret = NET_RX_DROP; - ret = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN); - if (!ret) - return NET_RX_DROP; + res = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN); + if (!res) + goto free_skb; if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) - return NET_RX_DROP; + goto free_skb; /* did we receive a B.A.T.M.A.N. V ELP packet on an interface * that does not have B.A.T.M.A.N. V ELP enabled ? */ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) - return NET_RX_DROP; + goto free_skb; elp_packet = (struct batadv_elp_packet *)skb->data; @@ -516,14 +517,19 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - goto out; + goto free_skb; batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming, elp_packet); -out: - if (primary_if) - batadv_hardif_put(primary_if); - consume_skb(skb); - return NET_RX_SUCCESS; + ret = NET_RX_SUCCESS; + batadv_hardif_put(primary_if); + +free_skb: + if (ret == NET_RX_SUCCESS) + consume_skb(skb); + else + kfree_skb(skb); + + return ret; } diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 61ff5f82cfbf..38b9aab83fc0 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -140,6 +140,7 @@ static void batadv_v_ogm_send(struct work_struct *work) unsigned char *ogm_buff, *pkt_buff; int ogm_buff_len; u16 tvlv_len = 0; + int ret; bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); bat_priv = container_of(bat_v, struct batadv_priv, bat_v); @@ -182,6 +183,31 @@ static void batadv_v_ogm_send(struct work_struct *work) if (!kref_get_unless_zero(&hard_iface->refcount)) continue; + ret = batadv_hardif_no_broadcast(hard_iface, NULL, NULL); + if (ret) { + char *type; + + switch (ret) { + case BATADV_HARDIF_BCAST_NORECIPIENT: + type = "no neighbor"; + break; + case BATADV_HARDIF_BCAST_DUPFWD: + type = "single neighbor is source"; + break; + case BATADV_HARDIF_BCAST_DUPORIG: + type = "single neighbor is originator"; + break; + default: + type = "unknown"; + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n", + hard_iface->net_dev->name, type); + + batadv_hardif_put(hard_iface); + continue; + } + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n", ogm_packet->orig, ntohl(ogm_packet->seqno), @@ -651,6 +677,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; u32 ogm_throughput, link_throughput, path_throughput; + int ret; ethhdr = eth_hdr(skb); ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset); @@ -716,6 +743,35 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, if (!kref_get_unless_zero(&hard_iface->refcount)) continue; + ret = batadv_hardif_no_broadcast(hard_iface, + ogm_packet->orig, + hardif_neigh->orig); + + if (ret) { + char *type; + + switch (ret) { + case BATADV_HARDIF_BCAST_NORECIPIENT: + type = "no neighbor"; + break; + case BATADV_HARDIF_BCAST_DUPFWD: + type = "single neighbor is source"; + break; + case BATADV_HARDIF_BCAST_DUPORIG: + type = "single neighbor is originator"; + break; + default: + type = "unknown"; + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n", + ogm_packet->orig, hard_iface->net_dev->name, + type); + + batadv_hardif_put(hard_iface); + continue; + } + batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, orig_node, neigh_node, if_incoming, hard_iface); @@ -754,18 +810,18 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, * B.A.T.M.A.N. V enabled ? */ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) - return NET_RX_DROP; + goto free_skb; if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) - return NET_RX_DROP; + goto free_skb; if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) - return NET_RX_DROP; + goto free_skb; ogm_packet = (struct batadv_ogm2_packet *)skb->data; if (batadv_is_my_mac(bat_priv, ogm_packet->orig)) - return NET_RX_DROP; + goto free_skb; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, @@ -786,7 +842,12 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, } ret = NET_RX_SUCCESS; - consume_skb(skb); + +free_skb: + if (ret == NET_RX_SUCCESS) + consume_skb(skb); + else + kfree_skb(skb); return ret; } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index cbb4f3232a7b..49576c5a3fe3 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -949,6 +949,41 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) } /** + * batadv_dat_arp_create_reply - create an ARP Reply + * @bat_priv: the bat priv with all the soft interface information + * @ip_src: ARP sender IP + * @ip_dst: ARP target IP + * @hw_src: Ethernet source and ARP sender MAC + * @hw_dst: Ethernet destination and ARP target MAC + * @vid: VLAN identifier (optional, set to zero otherwise) + * + * Creates an ARP Reply from the given values, optionally encapsulated in a + * VLAN header. + * + * Return: An skb containing an ARP Reply. + */ +static struct sk_buff * +batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src, + __be32 ip_dst, u8 *hw_src, u8 *hw_dst, + unsigned short vid) +{ + struct sk_buff *skb; + + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->soft_iface, + ip_src, hw_dst, hw_src, hw_dst); + if (!skb) + return NULL; + + skb_reset_mac_header(skb); + + if (vid & BATADV_VLAN_HAS_TAG) + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); + + return skb; +} + +/** * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to * answer using DAT * @bat_priv: the bat priv with all the soft interface information @@ -1005,20 +1040,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, goto out; } - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - bat_priv->soft_iface, ip_dst, hw_src, - dat_entry->mac_addr, hw_src); + skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src, + dat_entry->mac_addr, + hw_src, vid); if (!skb_new) goto out; - if (vid & BATADV_VLAN_HAS_TAG) { - skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q), - vid & VLAN_VID_MASK); - if (!skb_new) - goto out; - } - - skb_reset_mac_header(skb_new); skb_new->protocol = eth_type_trans(skb_new, bat_priv->soft_iface); bat_priv->stats.rx_packets++; @@ -1081,25 +1108,11 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!dat_entry) goto out; - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - bat_priv->soft_iface, ip_dst, hw_src, - dat_entry->mac_addr, hw_src); - + skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src, + dat_entry->mac_addr, hw_src, vid); if (!skb_new) goto out; - /* the rest of the TX path assumes that the mac_header offset pointing - * to the inner Ethernet header has been set, therefore reset it now. - */ - skb_reset_mac_header(skb_new); - - if (vid & BATADV_VLAN_HAS_TAG) { - skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q), - vid & VLAN_VID_MASK); - if (!skb_new) - goto out; - } - /* To preserve backwards compatibility, the node has choose the outgoing * format based on the incoming request packet type. The assumption is * that a node not using the 4addr packet format doesn't support it. diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 2b967a34703d..9c561e683f4b 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -20,6 +20,7 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> #include <linux/if_ether.h> @@ -42,17 +43,23 @@ /** * batadv_frag_clear_chain - delete entries in the fragment buffer chain * @head: head of chain with entries. + * @dropped: whether the chain is cleared because all fragments are dropped * * Free fragments in the passed hlist. Should be called with appropriate lock. */ -static void batadv_frag_clear_chain(struct hlist_head *head) +static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped) { struct batadv_frag_list_entry *entry; struct hlist_node *node; hlist_for_each_entry_safe(entry, node, head, list) { hlist_del(&entry->list); - kfree_skb(entry->skb); + + if (dropped) + kfree_skb(entry->skb); + else + consume_skb(entry->skb); + kfree(entry); } } @@ -73,7 +80,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, spin_lock_bh(&chain->lock); if (!check_cb || check_cb(chain)) { - batadv_frag_clear_chain(&chain->fragment_list); + batadv_frag_clear_chain(&chain->fragment_list, true); chain->size = 0; } @@ -118,7 +125,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, return false; if (!hlist_empty(&chain->fragment_list)) - batadv_frag_clear_chain(&chain->fragment_list); + batadv_frag_clear_chain(&chain->fragment_list, true); chain->size = 0; chain->seqno = seqno; @@ -220,7 +227,7 @@ out: * exceeds the maximum size of one merged packet. Don't allow * packets to have different total_size. */ - batadv_frag_clear_chain(&chain->fragment_list); + batadv_frag_clear_chain(&chain->fragment_list, true); chain->size = 0; } else if (ntohs(frag_packet->total_size) == chain->size) { /* All fragments received. Hand over chain to caller. */ @@ -254,6 +261,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) struct batadv_frag_list_entry *entry; struct sk_buff *skb_out; int size, hdr_size = sizeof(struct batadv_frag_packet); + bool dropped = false; /* Remove first entry, as this is the destination for the rest of the * fragments. @@ -270,6 +278,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { kfree_skb(skb_out); skb_out = NULL; + dropped = true; goto free; } @@ -291,7 +300,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) free: /* Locking is not needed, because 'chain' is not part of any orig. */ - batadv_frag_clear_chain(chain); + batadv_frag_clear_chain(chain, dropped); return skb_out; } @@ -433,8 +442,7 @@ err: * @orig_node: final destination of the created fragments * @neigh_node: next-hop of the created fragments * - * Return: the netdev tx status or -1 in case of error. - * When -1 is returned the skb is not consumed. + * Return: the netdev tx status or a negative errno code on a failure */ int batadv_frag_send_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node, @@ -447,7 +455,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, max_packet_size; - int ret = -1; + int ret; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE @@ -457,13 +465,17 @@ int batadv_frag_send_packet(struct sk_buff *skb, max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; /* Don't even try to fragment, if we need more than 16 fragments */ - if (skb->len > max_packet_size) - goto out; + if (skb->len > max_packet_size) { + ret = -EAGAIN; + goto free_skb; + } bat_priv = orig_node->bat_priv; primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; + if (!primary_if) { + ret = -EINVAL; + goto put_primary_if; + } /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; @@ -488,34 +500,35 @@ int batadv_frag_send_packet(struct sk_buff *skb, /* Eat and send fragments from the tail of skb */ while (skb->len > max_fragment_size) { skb_fragment = batadv_frag_create(skb, &frag_header, mtu); - if (!skb_fragment) - goto out; + if (!skb_fragment) { + ret = -ENOMEM; + goto free_skb; + } batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb_fragment->len + ETH_HLEN); ret = batadv_send_unicast_skb(skb_fragment, neigh_node); if (ret != NET_XMIT_SUCCESS) { - /* return -1 so that the caller can free the original - * skb - */ - ret = -1; - goto out; + ret = NET_XMIT_DROP; + goto free_skb; } frag_header.no++; /* The initial check in this function should cover this case */ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { - ret = -1; - goto out; + ret = -EINVAL; + goto free_skb; } } /* Make room for the fragment header. */ if (batadv_skb_head_push(skb, header_size) < 0 || - pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) - goto out; + pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) { + ret = -ENOMEM; + goto free_skb; + } memcpy(skb->data, &frag_header, header_size); @@ -524,10 +537,13 @@ int batadv_frag_send_packet(struct sk_buff *skb, batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb->len + ETH_HLEN); ret = batadv_send_unicast_skb(skb, neigh_node); + /* skb was consumed */ + skb = NULL; -out: - if (primary_if) - batadv_hardif_put(primary_if); +put_primary_if: + batadv_hardif_put(primary_if); +free_skb: + kfree_skb(skb); return ret; } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e034afbd1bb0..dc1816e9d53b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -228,6 +228,58 @@ bool batadv_is_wifi_netdev(struct net_device *net_device) return false; } +/** + * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary + * @if_outgoing: the outgoing interface checked and considered for (re)broadcast + * @orig_addr: the originator of this packet + * @orig_neigh: originator address of the forwarder we just got the packet from + * (NULL if we originated) + * + * Checks whether a packet needs to be (re)broadcasted on the given interface. + * + * Return: + * BATADV_HARDIF_BCAST_NORECIPIENT: No neighbor on interface + * BATADV_HARDIF_BCAST_DUPFWD: Just one neighbor, but it is the forwarder + * BATADV_HARDIF_BCAST_DUPORIG: Just one neighbor, but it is the originator + * BATADV_HARDIF_BCAST_OK: Several neighbors, must broadcast + */ +int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, + u8 *orig_addr, u8 *orig_neigh) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + struct hlist_node *first; + int ret = BATADV_HARDIF_BCAST_OK; + + rcu_read_lock(); + + /* 0 neighbors -> no (re)broadcast */ + first = rcu_dereference(hlist_first_rcu(&if_outgoing->neigh_list)); + if (!first) { + ret = BATADV_HARDIF_BCAST_NORECIPIENT; + goto out; + } + + /* >1 neighbors -> (re)brodcast */ + if (rcu_dereference(hlist_next_rcu(first))) + goto out; + + hardif_neigh = hlist_entry(first, struct batadv_hardif_neigh_node, + list); + + /* 1 neighbor, is the originator -> no rebroadcast */ + if (orig_addr && batadv_compare_eth(hardif_neigh->orig, orig_addr)) { + ret = BATADV_HARDIF_BCAST_DUPORIG; + /* 1 neighbor, is the one we received from -> no rebroadcast */ + } else if (orig_neigh && + batadv_compare_eth(hardif_neigh->orig, orig_neigh)) { + ret = BATADV_HARDIF_BCAST_DUPFWD; + } + +out: + rcu_read_unlock(); + return ret; +} + static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index a76724d369bf..a043182586e9 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -40,6 +40,20 @@ enum batadv_hard_if_state { }; /** + * enum batadv_hard_if_bcast - broadcast avoidance options + * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface + * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient + * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from + * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator + */ +enum batadv_hard_if_bcast { + BATADV_HARDIF_BCAST_OK = 0, + BATADV_HARDIF_BCAST_NORECIPIENT, + BATADV_HARDIF_BCAST_DUPFWD, + BATADV_HARDIF_BCAST_DUPORIG, +}; + +/** * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed @@ -63,6 +77,8 @@ void batadv_hardif_remove_interfaces(void); int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); void batadv_hardif_release(struct kref *ref); +int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, + u8 *orig_addr, u8 *orig_neigh); /** * batadv_hardif_put - decrement the hard interface refcounter and possibly diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5e4e818529d3..6b5dae6f0307 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -402,6 +402,8 @@ void batadv_skb_set_priority(struct sk_buff *skb, int offset) static int batadv_recv_unhandled_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { + kfree_skb(skb); + return NET_RX_DROP; } @@ -416,7 +418,6 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *hard_iface; u8 idx; - int ret; hard_iface = container_of(ptype, struct batadv_hard_iface, batman_adv_ptype); @@ -466,14 +467,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, /* reset control block to avoid left overs from previous users */ memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); - /* all receive handlers return whether they received or reused - * the supplied skb. if not, we have to free the skb. - */ idx = batadv_ogm_packet->packet_type; - ret = (*batadv_rx_handler[idx])(skb, hard_iface); - - if (ret == NET_RX_DROP) - kfree_skb(skb); + (*batadv_rx_handler[idx])(skb, hard_iface); batadv_hardif_put(hard_iface); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index daddca94feb8..a6cc8040a21d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -48,6 +48,7 @@ #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */ #define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */ +#define BATADV_MCAST_WORK_PERIOD 500 /* 0.5 seconds */ #define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 13661f43386f..090a69fc342e 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -33,6 +33,7 @@ #include <linux/in6.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> @@ -48,6 +49,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/workqueue.h> #include <net/addrconf.h> #include <net/if_inet6.h> #include <net/ip.h> @@ -60,6 +62,18 @@ #include "translation-table.h" #include "tvlv.h" +static void batadv_mcast_mla_update(struct work_struct *work); + +/** + * batadv_mcast_start_timer - schedule the multicast periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_mcast_start_timer(struct batadv_priv *bat_priv) +{ + queue_delayed_work(batadv_event_workqueue, &bat_priv->mcast.work, + msecs_to_jiffies(BATADV_MCAST_WORK_PERIOD)); +} + /** * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists * @soft_iface: netdev struct of the mesh interface @@ -231,19 +245,15 @@ out: /** * batadv_mcast_mla_list_free - free a list of multicast addresses - * @bat_priv: the bat priv with all the soft interface information * @mcast_list: the list to free * * Removes and frees all items in the given mcast_list. */ -static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv, - struct hlist_head *mcast_list) +static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) { struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - lockdep_assert_held(&bat_priv->tt.commit_lock); - hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) { hlist_del(&mcast_entry->list); kfree(mcast_entry); @@ -259,6 +269,8 @@ static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv, * translation table except the ones listed in the given mcast_list. * * If mcast_list is NULL then all are retracted. + * + * Do not call outside of the mcast worker! (or cancel mcast worker first) */ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) @@ -266,7 +278,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - lockdep_assert_held(&bat_priv->tt.commit_lock); + WARN_ON(delayed_work_pending(&bat_priv->mcast.work)); hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, list) { @@ -291,6 +303,8 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, * * Adds multicast listener announcements from the given mcast_list to the * translation table if they have not been added yet. + * + * Do not call outside of the mcast worker! (or cancel mcast worker first) */ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) @@ -298,7 +312,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - lockdep_assert_held(&bat_priv->tt.commit_lock); + WARN_ON(delayed_work_pending(&bat_priv->mcast.work)); if (!mcast_list) return; @@ -532,13 +546,18 @@ update: } /** - * batadv_mcast_mla_update - update the own MLAs + * __batadv_mcast_mla_update - update the own MLAs * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast listener announcements in the translation * table as well as the own, announced multicast tvlv container. + * + * Note that non-conflicting reads and writes to bat_priv->mcast.mla_list + * in batadv_mcast_mla_tt_retract() and batadv_mcast_mla_tt_add() are + * ensured by the non-parallel execution of the worker this function + * belongs to. */ -void batadv_mcast_mla_update(struct batadv_priv *bat_priv) +static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv) { struct net_device *soft_iface = bat_priv->soft_iface; struct hlist_head mcast_list = HLIST_HEAD_INIT; @@ -560,7 +579,30 @@ update: batadv_mcast_mla_tt_add(bat_priv, &mcast_list); out: - batadv_mcast_mla_list_free(bat_priv, &mcast_list); + batadv_mcast_mla_list_free(&mcast_list); +} + +/** + * batadv_mcast_mla_update - update the own MLAs + * @work: kernel work struct + * + * Updates the own multicast listener announcements in the translation + * table as well as the own, announced multicast tvlv container. + * + * In the end, reschedules the work timer. + */ +static void batadv_mcast_mla_update(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_priv_mcast *priv_mcast; + struct batadv_priv *bat_priv; + + delayed_work = to_delayed_work(work); + priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work); + bat_priv = container_of(priv_mcast, struct batadv_priv, mcast); + + __batadv_mcast_mla_update(bat_priv); + batadv_mcast_start_timer(bat_priv); } /** @@ -1132,6 +1174,9 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); + + INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update); + batadv_mcast_start_timer(bat_priv); } #ifdef CONFIG_BATMAN_ADV_DEBUGFS @@ -1243,12 +1288,13 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) */ void batadv_mcast_free(struct batadv_priv *bat_priv) { + cancel_delayed_work_sync(&bat_priv->mcast.work); + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); - spin_lock_bh(&bat_priv->tt.commit_lock); + /* safely calling outside of worker, as worker was canceled above */ batadv_mcast_mla_tt_retract(bat_priv, NULL); - spin_unlock_bh(&bat_priv->tt.commit_lock); } /** diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 1fb00ba84907..2cddaf52a21d 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -39,8 +39,6 @@ enum batadv_forw_mode { #ifdef CONFIG_BATMAN_ADV_MCAST -void batadv_mcast_mla_update(struct batadv_priv *bat_priv); - enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig); @@ -55,10 +53,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); #else -static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv) -{ -} - static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig) diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 005012ba9b48..062738163bdc 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -20,11 +20,14 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/cache.h> #include <linux/errno.h> +#include <linux/export.h> #include <linux/fs.h> #include <linux/genetlink.h> #include <linux/if_ether.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/printk.h> @@ -527,7 +530,7 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } -static struct genl_ops batadv_netlink_ops[] = { +static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH_INFO, .flags = GENL_ADMIN_PERM, diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index c213ddec86ad..ab5a3bf0765f 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -260,10 +260,16 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path) /** * batadv_nc_packet_free - frees nc packet * @nc_packet: the nc packet to free + * @dropped: whether the packet is freed because is is dropped */ -static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) +static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet, + bool dropped) { - kfree_skb(nc_packet->skb); + if (dropped) + kfree_skb(nc_packet->skb); + else + consume_skb(nc_packet->skb); + batadv_nc_path_put(nc_packet->nc_path); kfree(nc_packet); } @@ -576,7 +582,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) { batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node); nc_packet->skb = NULL; - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, false); } /** @@ -610,7 +616,7 @@ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, /* purge nc packet */ list_del(&nc_packet->list); - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, true); res = true; @@ -1208,11 +1214,11 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, } /* skb_src is now coded into skb_dest, so free it */ - kfree_skb(skb_src); + consume_skb(skb_src); /* avoid duplicate free of skb from nc_packet */ nc_packet->skb = NULL; - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, false); /* Send the coded packet and return true */ batadv_send_unicast_skb(skb_dest, first_dest); @@ -1399,7 +1405,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free * our ref */ - kfree_skb(skb); + consume_skb(skb); } /** @@ -1723,7 +1729,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, ether_addr_copy(unicast_packet->dest, orig_dest); unicast_packet->ttvn = ttvn; - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, false); return unicast_packet; } @@ -1813,11 +1819,11 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, /* Check if network coding is enabled */ if (!atomic_read(&bat_priv->network_coding)) - return NET_RX_DROP; + goto free_skb; /* Make sure we can access (and remove) header */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return NET_RX_DROP; + goto free_skb; coded_packet = (struct batadv_coded_packet *)skb->data; ethhdr = eth_hdr(skb); @@ -1825,7 +1831,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && !batadv_is_my_mac(bat_priv, coded_packet->second_dest)) - return NET_RX_DROP; + goto free_skb; /* Update stat counter */ if (batadv_is_my_mac(bat_priv, coded_packet->second_dest)) @@ -1835,7 +1841,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, coded_packet); if (!nc_packet) { batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); - return NET_RX_DROP; + goto free_skb; } /* Make skb's linear, because decoding accesses the entire buffer */ @@ -1860,7 +1866,10 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, return batadv_recv_unicast_packet(skb, recv_if); free_nc_packet: - batadv_nc_packet_free(nc_packet); + batadv_nc_packet_free(nc_packet, true); +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index c6e7e1e39b70..8f3b2969cc4e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -512,12 +512,14 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, * batadv_hardif_neigh_create - create a hardif neighbour node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve + * @orig_node: originator object representing the neighbour * * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr) + const u8 *neigh_addr, + struct batadv_orig_node *orig_node) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hardif_neigh_node *hardif_neigh; @@ -536,6 +538,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, kref_get(&hard_iface->refcount); INIT_HLIST_NODE(&hardif_neigh->list); ether_addr_copy(hardif_neigh->addr, neigh_addr); + ether_addr_copy(hardif_neigh->orig, orig_node->orig); hardif_neigh->if_incoming = hard_iface; hardif_neigh->last_seen = jiffies; @@ -556,12 +559,14 @@ out: * node * @hard_iface: the interface this neighbour is connected to * @neigh_addr: the interface address of the neighbour to retrieve + * @orig_node: originator object representing the neighbour * * Return: the hardif neighbour node if found or created or NULL otherwise. */ static struct batadv_hardif_neigh_node * batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr) + const u8 *neigh_addr, + struct batadv_orig_node *orig_node) { struct batadv_hardif_neigh_node *hardif_neigh; @@ -570,7 +575,7 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, if (hardif_neigh) return hardif_neigh; - return batadv_hardif_neigh_create(hard_iface, neigh_addr); + return batadv_hardif_neigh_create(hard_iface, neigh_addr, orig_node); } /** @@ -630,7 +635,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node, goto out; hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface, - neigh_addr); + neigh_addr, orig_node); if (!hardif_neigh) goto out; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7e8dc648b95a..6713bdf414cd 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -196,8 +196,8 @@ bool batadv_check_management_packet(struct sk_buff *skb, if (!is_broadcast_ether_addr(ethhdr->h_dest)) return false; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) + /* packet with invalid sender address */ + if (!is_valid_ether_addr(ethhdr->h_source)) return false; /* create a copy of the skb, if needed, to modify it. */ @@ -262,11 +262,11 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res == -1) - goto out; - - ret = NET_RX_SUCCESS; + if (res == NET_XMIT_SUCCESS) + ret = NET_RX_SUCCESS; + /* skb was consumed */ + skb = NULL; break; case BATADV_TP: if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet))) @@ -274,6 +274,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, batadv_tp_meter_recv(bat_priv, skb); ret = NET_RX_SUCCESS; + /* skb was consumed */ + skb = NULL; goto out; default: /* drop unknown type */ @@ -284,6 +286,9 @@ out: batadv_hardif_put(primary_if); if (orig_node) batadv_orig_node_put(orig_node); + + kfree_skb(skb); + return ret; } @@ -325,14 +330,20 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res != -1) - ret = NET_RX_SUCCESS; + if (res == NET_RX_SUCCESS) + ret = NET_XMIT_SUCCESS; + + /* skb was consumed */ + skb = NULL; out: if (primary_if) batadv_hardif_put(primary_if); if (orig_node) batadv_orig_node_put(orig_node); + + kfree_skb(skb); + return ret; } @@ -349,21 +360,21 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - goto out; + goto free_skb; ethhdr = eth_hdr(skb); - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; + /* packet with unicast indication but non-unicast recipient */ + if (!is_valid_ether_addr(ethhdr->h_dest)) + goto free_skb; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) - goto out; + /* packet with broadcast/multicast sender address */ + if (is_multicast_ether_addr(ethhdr->h_source)) + goto free_skb; /* not for me */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) - goto out; + goto free_skb; icmph = (struct batadv_icmp_header *)skb->data; @@ -372,17 +383,17 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph->msg_type == BATADV_ECHO_REQUEST) && (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { if (skb_linearize(skb) < 0) - goto out; + goto free_skb; /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, ETH_HLEN) < 0) - goto out; + goto free_skb; ethhdr = eth_hdr(skb); icmph = (struct batadv_icmp_header *)skb->data; icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) - goto out; + goto free_skb; ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur], ethhdr->h_dest); @@ -400,11 +411,11 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* get routing information */ orig_node = batadv_orig_hash_find(bat_priv, icmph->dst); if (!orig_node) - goto out; + goto free_skb; /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, ETH_HLEN) < 0) - goto out; + goto put_orig_node; icmph = (struct batadv_icmp_header *)skb->data; @@ -413,12 +424,18 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* route it */ res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - if (res != -1) + if (res == NET_XMIT_SUCCESS) ret = NET_RX_SUCCESS; -out: + /* skb was consumed */ + skb = NULL; + +put_orig_node: if (orig_node) batadv_orig_node_put(orig_node); +free_skb: + kfree_skb(skb); + return ret; } @@ -445,12 +462,12 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, ethhdr = eth_hdr(skb); - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) + /* packet with unicast indication but non-unicast recipient */ + if (!is_valid_ether_addr(ethhdr->h_dest)) return -EBADR; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) + /* packet with broadcast/multicast sender address */ + if (is_multicast_ether_addr(ethhdr->h_source)) return -EBADR; /* not for me */ @@ -667,18 +684,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, if (unicast_packet->ttl < 2) { pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); - goto out; + goto free_skb; } /* get routing information */ orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest); if (!orig_node) - goto out; + goto free_skb; /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, ETH_HLEN) < 0) - goto out; + goto put_orig_node; /* decrement ttl */ unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -702,8 +719,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, len = skb->len; res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - if (res == -1) - goto out; + if (res == NET_XMIT_SUCCESS) + ret = NET_RX_SUCCESS; + + /* skb was consumed */ + skb = NULL; /* translate transmit result into receive result */ if (res == NET_XMIT_SUCCESS) { @@ -713,11 +733,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, len + ETH_HLEN); } - ret = NET_RX_SUCCESS; +put_orig_node: + batadv_orig_node_put(orig_node); +free_skb: + kfree_skb(skb); -out: - if (orig_node) - batadv_orig_node_put(orig_node); return ret; } @@ -902,14 +922,18 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); if (check < 0) - return NET_RX_DROP; + goto free_skb; /* we don't know about this type, drop it. */ unicast_packet = (struct batadv_unicast_packet *)skb->data; if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) - return NET_RX_DROP; + goto free_skb; return batadv_route_unicast_packet(skb, recv_if); + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } int batadv_recv_unicast_packet(struct sk_buff *skb, @@ -923,6 +947,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; bool is4addr; + int ret = NET_RX_DROP; unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; @@ -942,9 +967,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, batadv_nc_skb_store_sniffed_unicast(bat_priv, skb); if (check < 0) - return NET_RX_DROP; + goto free_skb; if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) - return NET_RX_DROP; + goto free_skb; /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { @@ -982,7 +1007,14 @@ rx_success: return NET_RX_SUCCESS; } - return batadv_route_unicast_packet(skb, recv_if); + ret = batadv_route_unicast_packet(skb, recv_if); + /* skb was consumed */ + skb = NULL; + +free_skb: + kfree_skb(skb); + + return ret; } /** @@ -1004,15 +1036,15 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, int ret = NET_RX_DROP; if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) - return NET_RX_DROP; + goto free_skb; /* the header is likely to be modified while forwarding */ if (skb_cow(skb, hdr_size) < 0) - return NET_RX_DROP; + goto free_skb; /* packet needs to be linearized to access the tvlv content */ if (skb_linearize(skb) < 0) - return NET_RX_DROP; + goto free_skb; unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data; @@ -1020,17 +1052,21 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb, tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len); if (tvlv_buff_len > skb->len - hdr_size) - return NET_RX_DROP; + goto free_skb; ret = batadv_tvlv_containers_process(bat_priv, false, NULL, unicast_tvlv_packet->src, unicast_tvlv_packet->dst, tvlv_buff, tvlv_buff_len); - if (ret != NET_RX_SUCCESS) + if (ret != NET_RX_SUCCESS) { ret = batadv_route_unicast_packet(skb, recv_if); - else - consume_skb(skb); + /* skb was consumed */ + skb = NULL; + } + +free_skb: + kfree_skb(skb); return ret; } @@ -1056,20 +1092,22 @@ int batadv_recv_frag_packet(struct sk_buff *skb, if (batadv_check_unicast_packet(bat_priv, skb, sizeof(*frag_packet)) < 0) - goto out; + goto free_skb; frag_packet = (struct batadv_frag_packet *)skb->data; orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig); if (!orig_node_src) - goto out; + goto free_skb; skb->priority = frag_packet->priority + 256; /* Route the fragment if it is not for us and too big to be merged. */ if (!batadv_is_my_mac(bat_priv, frag_packet->dest) && batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) { + /* skb was consumed */ + skb = NULL; ret = NET_RX_SUCCESS; - goto out; + goto put_orig_node; } batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX); @@ -1077,20 +1115,24 @@ int batadv_recv_frag_packet(struct sk_buff *skb, /* Add fragment to buffer and merge if possible. */ if (!batadv_frag_skb_buffer(&skb, orig_node_src)) - goto out; + goto put_orig_node; /* Deliver merged packet to the appropriate handler, if it was * merged */ - if (skb) + if (skb) { batadv_batman_skb_recv(skb, recv_if->net_dev, &recv_if->batman_adv_ptype, NULL); + /* skb was consumed */ + skb = NULL; + } ret = NET_RX_SUCCESS; -out: - if (orig_node_src) - batadv_orig_node_put(orig_node_src); +put_orig_node: + batadv_orig_node_put(orig_node_src); +free_skb: + kfree_skb(skb); return ret; } @@ -1109,35 +1151,35 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - goto out; + goto free_skb; ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; + goto free_skb; - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) - goto out; + /* packet with broadcast/multicast sender address */ + if (is_multicast_ether_addr(ethhdr->h_source)) + goto free_skb; /* ignore broadcasts sent by myself */ if (batadv_is_my_mac(bat_priv, ethhdr->h_source)) - goto out; + goto free_skb; bcast_packet = (struct batadv_bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ if (batadv_is_my_mac(bat_priv, bcast_packet->orig)) - goto out; + goto free_skb; if (bcast_packet->ttl < 2) - goto out; + goto free_skb; orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig); if (!orig_node) - goto out; + goto free_skb; spin_lock_bh(&orig_node->bcast_seqno_lock); @@ -1165,18 +1207,18 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, /* check whether this has been sent by another originator before */ if (batadv_bla_check_bcast_duplist(bat_priv, skb)) - goto out; + goto free_skb; batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet)); /* rebroadcast packet */ - batadv_add_bcast_packet_to_list(bat_priv, skb, 1); + batadv_add_bcast_packet_to_list(bat_priv, skb, 1, false); /* don't hand the broadcast up if it is from an originator * from the same backbone. */ if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size)) - goto out; + goto free_skb; if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size)) goto rx_success; @@ -1192,6 +1234,8 @@ rx_success: spin_unlock: spin_unlock_bh(&orig_node->bcast_seqno_lock); +free_skb: + kfree_skb(skb); out: if (orig_node) batadv_orig_node_put(orig_node); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index e1e9136db6e8..9ea272ef6612 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -64,8 +64,11 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work); * If neigh_node is NULL, then the packet is broadcasted using hard_iface, * otherwise it is sent as unicast to the given neighbor. * - * Return: NET_TX_DROP in case of error or the result of dev_queue_xmit(skb) - * otherwise + * Regardless of the return value, the skb is consumed. + * + * Return: A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. */ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, @@ -73,7 +76,6 @@ int batadv_send_skb_packet(struct sk_buff *skb, { struct batadv_priv *bat_priv; struct ethhdr *ethhdr; - int ret; bat_priv = netdev_priv(hard_iface->soft_iface); @@ -111,15 +113,8 @@ int batadv_send_skb_packet(struct sk_buff *skb, /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. - * - * a negative value cannot be returned because it could be interepreted - * as not consumed skb by callers of batadv_send_skb_to_orig. */ - ret = dev_queue_xmit(skb); - if (ret < 0) - ret = NET_XMIT_DROP; - - return ret; + return dev_queue_xmit(skb); send_skb_err: kfree_skb(skb); return NET_XMIT_DROP; @@ -165,11 +160,9 @@ int batadv_send_unicast_skb(struct sk_buff *skb, * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the - * skb is buffered for later transmit or the NET_XMIT status returned by the + * Return: negative errno code on a failure, -EINPROGRESS if the skb is + * buffered for later transmit or the NET_XMIT status returned by the * lower routine if the packet has been passed down. - * - * If the returning value is not -1 the skb has been consumed. */ int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, @@ -177,12 +170,14 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; - int ret = -1; + int ret; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); - if (!neigh_node) - goto out; + if (!neigh_node) { + ret = -EINVAL; + goto free_skb; + } /* Check if the skb is too large to send in one piece and fragment * it if needed. @@ -191,8 +186,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, skb->len > neigh_node->if_incoming->net_dev->mtu) { /* Fragment and send packet. */ ret = batadv_frag_send_packet(skb, orig_node, neigh_node); + /* skb was consumed */ + skb = NULL; - goto out; + goto put_neigh_node; } /* try to network code the packet, if it is received on an interface @@ -204,9 +201,13 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, else ret = batadv_send_unicast_skb(skb, neigh_node); -out: - if (neigh_node) - batadv_neigh_node_put(neigh_node); + /* skb was consumed */ + skb = NULL; + +put_neigh_node: + batadv_neigh_node_put(neigh_node); +free_skb: + kfree_skb(skb); return ret; } @@ -327,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr; - int res, ret = NET_XMIT_DROP; + int ret = NET_XMIT_DROP; if (!orig_node) goto out; @@ -364,13 +365,12 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) unicast_packet->ttvn = unicast_packet->ttvn - 1; - res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res != -1) - ret = NET_XMIT_SUCCESS; + ret = batadv_send_skb_to_orig(skb, orig_node, NULL); + /* skb was consumed */ + skb = NULL; out: - if (ret == NET_XMIT_DROP) - kfree_skb(skb); + kfree_skb(skb); return ret; } @@ -451,13 +451,19 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, /** * batadv_forw_packet_free - free a forwarding packet * @forw_packet: The packet to free + * @dropped: whether the packet is freed because is is dropped * * This frees a forwarding packet and releases any resources it might * have claimed. */ -void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, + bool dropped) { - kfree_skb(forw_packet->skb); + if (dropped) + kfree_skb(forw_packet->skb); + else + consume_skb(forw_packet->skb); + if (forw_packet->if_incoming) batadv_hardif_put(forw_packet->if_incoming); if (forw_packet->if_outgoing) @@ -549,6 +555,7 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: broadcast packet to add * @delay: number of jiffies to wait before sending + * @own_packet: true if it is a self-generated broadcast packet * * add a broadcast packet to the queue and setup timers. broadcast packets * are sent multiple times to increase probability for being received. @@ -560,7 +567,8 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, */ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, - unsigned long delay) + unsigned long delay, + bool own_packet) { struct batadv_hard_iface *primary_if; struct batadv_forw_packet *forw_packet; @@ -586,9 +594,8 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, bcast_packet = (struct batadv_bcast_packet *)newskb->data; bcast_packet->ttl--; - skb_reset_mac_header(newskb); - forw_packet->skb = newskb; + forw_packet->own = own_packet; INIT_DELAYED_WORK(&forw_packet->delayed_work, batadv_send_outstanding_bcast_packet); @@ -597,7 +604,7 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, return NETDEV_TX_OK; err_packet_free: - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, true); err: return NETDEV_TX_BUSY; } @@ -605,11 +612,17 @@ err: static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { struct batadv_hard_iface *hard_iface; + struct batadv_hardif_neigh_node *neigh_node; struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; + struct batadv_bcast_packet *bcast_packet; struct sk_buff *skb1; struct net_device *soft_iface; struct batadv_priv *bat_priv; + bool dropped = false; + u8 *neigh_addr; + u8 *orig_neigh; + int ret = 0; delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, @@ -621,11 +634,17 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) hlist_del(&forw_packet->list); spin_unlock_bh(&bat_priv->forw_bcast_list_lock); - if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { + dropped = true; goto out; + } - if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) + if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) { + dropped = true; goto out; + } + + bcast_packet = (struct batadv_bcast_packet *)forw_packet->skb->data; /* rebroadcast packet */ rcu_read_lock(); @@ -636,6 +655,49 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (forw_packet->num_packets >= hard_iface->num_bcasts) continue; + if (forw_packet->own) { + neigh_node = NULL; + } else { + neigh_addr = eth_hdr(forw_packet->skb)->h_source; + neigh_node = batadv_hardif_neigh_get(hard_iface, + neigh_addr); + } + + orig_neigh = neigh_node ? neigh_node->orig : NULL; + + ret = batadv_hardif_no_broadcast(hard_iface, bcast_packet->orig, + orig_neigh); + + if (ret) { + char *type; + + switch (ret) { + case BATADV_HARDIF_BCAST_NORECIPIENT: + type = "no neighbor"; + break; + case BATADV_HARDIF_BCAST_DUPFWD: + type = "single neighbor is source"; + break; + case BATADV_HARDIF_BCAST_DUPORIG: + type = "single neighbor is originator"; + break; + default: + type = "unknown"; + } + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n", + bcast_packet->orig, + hard_iface->net_dev->name, type); + + if (neigh_node) + batadv_hardif_neigh_put(neigh_node); + + continue; + } + + if (neigh_node) + batadv_hardif_neigh_put(neigh_node); + if (!kref_get_unless_zero(&hard_iface->refcount)) continue; @@ -658,7 +720,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) } out: - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, dropped); } void @@ -699,7 +761,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, true); } } spin_unlock_bh(&bat_priv->forw_bcast_list_lock); @@ -726,7 +788,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - batadv_forw_packet_free(forw_packet); + batadv_forw_packet_free(forw_packet, true); } } spin_unlock_bh(&bat_priv->forw_bat_list_lock); diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 999f78683d9e..c58019475025 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -27,7 +27,8 @@ struct sk_buff; -void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, + bool dropped); struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, @@ -46,7 +47,8 @@ int batadv_send_unicast_skb(struct sk_buff *skb, struct batadv_neigh_node *neigh_node); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, - unsigned long delay); + unsigned long delay, + bool own_packet); void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index f37c1c769da0..7b3494ae6ad9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -357,12 +357,12 @@ send: seqno = atomic_inc_return(&bat_priv->bcast_seqno); bcast_packet->seqno = htonl(seqno); - batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay); + batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay, true); /* a copy is stored in the bcast list, therefore removing * the original skb. */ - kfree_skb(skb); + consume_skb(skb); /* unicast packet */ } else { @@ -386,7 +386,7 @@ send: ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint, vid); } - if (ret == NET_XMIT_DROP) + if (ret != NET_XMIT_SUCCESS) goto dropped_freed; } diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 2333777f919d..f1564520dfae 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -615,9 +615,6 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, batadv_tp_fill_prerandom(tp_vars, data, data_len); r = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (r == -1) - kfree_skb(skb); - if (r == NET_XMIT_SUCCESS) return 0; @@ -1206,9 +1203,6 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, /* send the ack */ r = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (r == -1) - kfree_skb(skb); - if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { ret = BATADV_TP_REASON_DST_UNREACHABLE; goto out; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index ad1e3bc0e205..3cae8f4fd717 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -56,7 +56,6 @@ #include "hard-interface.h" #include "hash.h" #include "log.h" -#include "multicast.h" #include "netlink.h" #include "originator.h" #include "packet.h" @@ -3795,9 +3794,6 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) { lockdep_assert_held(&bat_priv->tt.commit_lock); - /* Update multicast addresses in local translation table */ - batadv_mcast_mla_update(bat_priv); - if (atomic_read(&bat_priv->tt.local_changes) < 1) { if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) batadv_tt_tvlv_container_update(bat_priv); diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 77654f055f24..a783420356ae 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -600,7 +600,6 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, unsigned char *tvlv_buff; unsigned int tvlv_len; ssize_t hdr_len = sizeof(*unicast_tvlv_packet); - int res; orig_node = batadv_orig_hash_find(bat_priv, dst); if (!orig_node) @@ -633,9 +632,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, tvlv_buff += sizeof(*tvlv_hdr); memcpy(tvlv_buff, tvlv_value, tvlv_value_len); - res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res == -1) - kfree_skb(skb); + batadv_send_skb_to_orig(skb, orig_node, NULL); out: batadv_orig_node_put(orig_node); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 673a22e3a68a..98ebac05c571 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -123,8 +123,8 @@ struct batadv_hard_iface_bat_v { * @list: list node for batadv_hardif_list * @if_num: identificator of the interface * @if_status: status of the interface for batman-adv - * @net_dev: pointer to the net_device * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) + * @net_dev: pointer to the net_device * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used * @batman_adv_ptype: packet type describing packets that should be processed by @@ -141,8 +141,8 @@ struct batadv_hard_iface { struct list_head list; s16 if_num; char if_status; - struct net_device *net_dev; u8 num_bcasts; + struct net_device *net_dev; struct kobject *hardif_obj; struct kref refcount; struct packet_type batman_adv_ptype; @@ -408,6 +408,7 @@ struct batadv_hardif_neigh_node_bat_v { * struct batadv_hardif_neigh_node - unique neighbor per hard-interface * @list: list node for batadv_hard_iface::neigh_list * @addr: the MAC address of the neighboring interface + * @orig: the address of the originator this neighbor node belongs to * @if_incoming: pointer to incoming hard-interface * @last_seen: when last packet via this neighbor was received * @bat_v: B.A.T.M.A.N. V private data @@ -417,6 +418,7 @@ struct batadv_hardif_neigh_node_bat_v { struct batadv_hardif_neigh_node { struct hlist_node list; u8 addr[ETH_ALEN]; + u8 orig[ETH_ALEN]; struct batadv_hard_iface *if_incoming; unsigned long last_seen; #ifdef CONFIG_BATMAN_ADV_BATMAN_V @@ -785,9 +787,10 @@ struct batadv_mcast_querier_state { * @num_want_all_ipv6: counter for items in want_all_ipv6_list * @want_lists_lock: lock for protecting modifications to mcast want lists * (traversals are rcu-locked) + * @work: work queue callback item for multicast TT and TVLV updates */ struct batadv_priv_mcast { - struct hlist_head mla_list; + struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */ struct hlist_head want_all_unsnoopables_list; struct hlist_head want_all_ipv4_list; struct hlist_head want_all_ipv6_list; @@ -802,6 +805,7 @@ struct batadv_priv_mcast { atomic_t num_want_all_ipv6; /* protects want_all_{unsnoopables,ipv4,ipv6}_list */ spinlock_t want_lists_lock; + struct delayed_work work; }; #endif diff --git a/net/core/datagram.c b/net/core/datagram.c index bfb973aebb5b..49816af8586b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -165,6 +165,7 @@ done: * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @destructor: invoked under the receive lock on successful dequeue * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts @@ -197,6 +198,8 @@ done: * the standard around please. */ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last) { @@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } atomic_inc(&skb->users); - } else + } else { __skb_unlink(skb, queue); - + if (destructor) + destructor(sk, skb); + } spin_unlock_irqrestore(&queue->lock, cpu_flags); *off = _off; return skb; @@ -262,6 +267,8 @@ no_packet: EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err) { struct sk_buff *skb, *last; @@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, - &last); + skb = __skb_try_recv_datagram(sk, flags, destructor, peeked, + off, err, &last); if (skb) return skb; @@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, err); + NULL, &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index f23e28668f32..7385c1a152fd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4482,7 +4482,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) + if (skb->csum_bad) goto normal; gro_list_prepare(napi, skb); @@ -4495,7 +4495,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_set_network_header(skb, skb_gro_offset(skb)); skb_reset_mac_len(skb); NAPI_GRO_CB(skb)->same_flow = 0; - NAPI_GRO_CB(skb)->flush = 0; + NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->recursion_counter = 0; @@ -5017,7 +5017,7 @@ EXPORT_SYMBOL(sk_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ -void napi_hash_add(struct napi_struct *napi) +static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) @@ -5037,7 +5037,6 @@ void napi_hash_add(struct napi_struct *napi) spin_unlock(&napi_hash_lock); } -EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi @@ -7651,7 +7650,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->tx_queue_len) { dev->priv_flags |= IFF_NO_QUEUE; - dev->tx_queue_len = 1; + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; } dev->num_tx_queues = txqs; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index be4629c344a6..b6791d94841d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,11 @@ #include <net/fib_rules.h> #include <net/ip_tunnels.h> +static const struct fib_kuid_range fib_kuid_range_unset = { + KUIDT_INIT(0), + KUIDT_INIT(~0), +}; + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; + r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int uid_range_set(struct fib_kuid_range *range) +{ + return uid_valid(range->start) && uid_valid(range->end); +} + +static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) +{ + struct fib_rule_uid_range *in; + struct fib_kuid_range out; + + in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); + + out.start = make_kuid(current_user_ns(), in->start); + out.end = make_kuid(current_user_ns(), in->end); + + return out; +} + +static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) +{ + struct fib_rule_uid_range out = { + from_kuid_munged(current_user_ns(), range->start), + from_kuid_munged(current_user_ns(), range->end) + }; + + return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; + if (uid_lt(fl->flowi_uid, rule->uid_range.start) || + uid_gt(fl->flowi_uid, rule->uid_range.end)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -305,6 +343,10 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, if (r->l3mdev != rule->l3mdev) continue; + if (!uid_eq(r->uid_range.start, rule->uid_range.start) || + !uid_eq(r->uid_range.end, rule->uid_range.end)) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -429,6 +471,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if (tb[FRA_UID_RANGE]) { + if (current_user_ns() != net->user_ns) { + err = -EPERM; + goto errout_free; + } + + rule->uid_range = nla_get_kuid_range(tb); + + if (!uid_range_set(&rule->uid_range) || + !uid_lte(rule->uid_range.start, rule->uid_range.end)) + goto errout_free; + } else { + rule->uid_range = fib_kuid_range_unset; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -497,6 +554,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; + struct fib_kuid_range range; int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) @@ -516,6 +574,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; + if (tb[FRA_UID_RANGE]) { + range = nla_get_kuid_range(tb); + if (!uid_range_set(&range)) + goto errout; + } else { + range = fib_kuid_range_unset; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -552,6 +618,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) continue; + if (uid_range_set(&range) && + (!uid_eq(rule->uid_range.start, range.start) || + !uid_eq(rule->uid_range.end, range.end))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -619,7 +690,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size_64bit(8); /* FRA_TUN_ID */ + + nla_total_size_64bit(8) /* FRA_TUN_ID */ + + nla_total_size(sizeof(struct fib_kuid_range)); if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -679,7 +751,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->tun_id && nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || (rule->l3mdev && - nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev))) + nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || + (uid_range_set(&rule->uid_range) && + nla_put_uid_range(skb, &rule->uid_range))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 88fd64250b02..03976e939818 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -39,6 +39,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "MPLS"; case LWTUNNEL_ENCAP_ILA: return "ILA"; + case LWTUNNEL_ENCAP_SEG6: + return "SEG6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1e3e0087245b..0b2a6e94af2d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3725,7 +3725,6 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) err = SKB_EXT_ERR(skb_next)->ee.ee_errno; spin_unlock_irqrestore(&q->lock, flags); - sk->sk_err = err; if (err) sk->sk_error_report(sk); diff --git a/net/core/sock.c b/net/core/sock.c index d8e4532e89e7..40dbc13453f9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2460,8 +2460,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index d9e2fe1da724..8c5a479681ca 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,6 +62,7 @@ #include <net/dsa.h> #include <net/flow_dissector.h> #include <linux/uaccess.h> +#include <net/pkt_sched.h> __setup("ether=", netdev_boot_setup); @@ -359,7 +360,7 @@ void ether_setup(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; - dev->tx_queue_len = 1000; /* Ethernet wants good queues */ + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..d93eea8e2409 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -610,6 +610,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_FLOW] = { .type = NLA_U32 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e595f..53a890b605fc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); @@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 61a9deec2993..d5d3ead0a6c3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -415,7 +415,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -452,7 +452,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3b34024202d8..4dea33e5f295 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -861,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); - int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; - bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + int i, num, s_i, s_num; + struct sock *sk; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -877,7 +878,6 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; - struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; @@ -922,13 +922,14 @@ skip_listen_ht: if (!(idiag_states & ~TCPF_LISTEN)) goto out; +#define SKARR_SZ 16 for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct hlist_nulls_node *node; - struct sock *sk; - - num = 0; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; if (hlist_nulls_empty(&head->chain)) continue; @@ -936,9 +937,12 @@ skip_listen_ht: if (i > s_i) s_num = 0; +next_chunk: + num = 0; + accum = 0; spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - int state, res; + int state; if (!net_eq(sock_net(sk), net)) continue; @@ -962,21 +966,35 @@ skip_listen_ht: if (!inet_diag_bc_sk(bc, sk)) goto next_normal; - res = sk_diag_fill(sk, skb, r, + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + break; +next_normal: + ++num; + } + spin_unlock_bh(lock); + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = sk_diag_fill(sk_arr[idx], skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); - if (res < 0) { - spin_unlock_bh(lock); - goto done; + if (res < 0) + num = num_arr[idx]; } -next_normal: - ++num; + sock_gen_put(sk_arr[idx]); } - - spin_unlock_bh(lock); + if (res < 0) + break; cond_resched(); + if (accum == SKARR_SZ) { + s_num = num + 1; + goto next_chunk; + } } done: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 03e7f7310423..37dfacd340af 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1587,7 +1587,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b8a2d63d1fb8..8b13881ed064 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) +{ + int val; + + if (IPCB(skb)->frag_max_size == 0) + return; + + val = IPCB(skb)->frag_max_size; + put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); +} + static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset) { @@ -153,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); } -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int tlen, int offset) +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset) { - struct inet_sock *inet = inet_sk(skb->sk); + struct inet_sock *inet = inet_sk(sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ @@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, if (flags & IP_CMSG_CHECKSUM) ip_cmsg_recv_checksum(msg, skb, tlen, offset); + + if (flags & IP_CMSG_RECVFRAGSIZE) + ip_cmsg_recv_fragsize(msg, skb); } EXPORT_SYMBOL(ip_cmsg_recv_offset); @@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: case IP_CHECKSUM: + case IP_RECVFRAGSIZE: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, } } break; + case IP_RECVFRAGSIZE: + if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) + goto e_inval; + if (val) + inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; + else + inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -1357,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_CHECKSUM: val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; break; + case IP_RECVFRAGSIZE: + val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; + break; case IP_TOS: val = inet->tos; break; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 205e2000d395..d11129f1178d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -789,7 +789,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6a0bd68a565b..2300fae11b22 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,7 +606,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, sk->sk_uid); if (!inet->hdrincl) { rfv.msg = msg; @@ -695,12 +695,20 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + u32 tb_id = RT_TABLE_LOCAL; int ret = -EINVAL; int chk_addr_ret; if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); + + if (sk->sk_bound_dev_if) + tb_id = l3mdev_fib_table_by_index(sock_net(sk), + sk->sk_bound_dev_if) ? : tb_id; + + chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr, + tb_id); + ret = -EADDRNOTAVAIL; if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index be930908bcf9..e1a51ca68d23 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -79,10 +79,11 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 * hashinfo->lock here. */ sock_hold(sk); - break; + goto out_unlock; } } } +out_unlock: read_unlock(&hashinfo->lock); return sk ? sk : ERR_PTR(-ENOENT); @@ -205,11 +206,14 @@ static int raw_diag_destroy(struct sk_buff *in_skb, { struct net *net = sock_net(in_skb->sk); struct sock *sk; + int err; sk = raw_sock_get(net, r); if (IS_ERR(sk)) return PTR_ERR(sk); - return sock_diag_destroy(sk, ECONNABORTED); + err = sock_diag_destroy(sk, ECONNABORTED); + sock_put(sk); + return err; } #endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4392db83d540..2355883e1025 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -523,7 +524,8 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, @@ -535,7 +537,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) @@ -552,7 +554,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -800,7 +802,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1018,7 +1020,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1034,7 +1036,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1053,6 +1055,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1066,7 +1069,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1106,7 +1109,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1121,9 +1124,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); @@ -2504,6 +2508,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2556,6 +2565,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2583,6 +2593,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2590,6 +2604,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3c4043c27de..0dc6286272aa 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -372,7 +372,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest); + ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3251fe71f39f..f8f924ca662d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,7 +279,6 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> -#include <asm/unaligned.h> #include <net/busy_poll.h> int sysctl_tcp_min_tso_segs __read_mostly = 2; @@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - u64_stats_init(&tp->syncp); tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2710,9 +2708,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp, intv; - unsigned int start; - int notsent_bytes; u64 rate64; + bool slow; u32 rate; memset(info, 0, sizeof(*info)); @@ -2721,6 +2718,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_state = sk_state_load(sk); + /* Report meaningful fields for all TCP states, including listeners */ + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + info->tcpi_pacing_rate = rate64; + + rate = READ_ONCE(sk->sk_max_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + info->tcpi_max_pacing_rate = rate64; + + info->tcpi_reordering = tp->reordering; + info->tcpi_snd_cwnd = tp->snd_cwnd; + + if (info->tcpi_state == TCP_LISTEN) { + /* listeners aliased fields : + * tcpi_unacked -> Number of children ready for accept() + * tcpi_sacked -> max backlog + */ + info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_sacked = sk->sk_max_ack_backlog; + return; + } info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; @@ -2748,13 +2766,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - if (info->tcpi_state == TCP_LISTEN) { - info->tcpi_unacked = sk->sk_ack_backlog; - info->tcpi_sacked = sk->sk_max_ack_backlog; - } else { - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; - } + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; @@ -2768,34 +2782,24 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = tp->srtt_us >> 3; info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; - info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; - info->tcpi_reordering = tp->reordering; info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; - rate = READ_ONCE(sk->sk_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_pacing_rate); + slow = lock_sock_fast(sk); - rate = READ_ONCE(sk->sk_max_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_max_pacing_rate); + info->tcpi_bytes_acked = tp->bytes_acked; + info->tcpi_bytes_received = tp->bytes_received; + info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); + + unlock_sock_fast(sk, slow); - do { - start = u64_stats_fetch_begin_irq(&tp->syncp); - put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); - put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); - } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; - notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); - info->tcpi_notsent_bytes = max(0, notsent_bytes); - info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; @@ -2806,7 +2810,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (rate && intv) { rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; do_div(rate64, intv); - put_unaligned(rate64, &info->tcpi_delivery_rate); + info->tcpi_delivery_rate = rate64; } } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f2c59c8e57ff..a70046fea0e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3351,9 +3351,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) u32 delta = ack - tp->snd_una; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_acked += delta; - u64_stats_update_end_raw(&tp->syncp); tp->snd_una = ack; } @@ -3363,9 +3361,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) u32 delta = seq - tp->rcv_nxt; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_received += delta; - u64_stats_update_end_raw(&tp->syncp); tp->rcv_nxt = seq; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b9b8282633d4..6491b7c1f975 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -691,6 +691,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -711,7 +712,7 @@ out: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -726,6 +727,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -775,6 +777,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -790,7 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -818,7 +821,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 395361b1398e..c827e4ea509e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1020,7 +1020,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); @@ -1173,26 +1174,26 @@ out: return ret; } +/* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial) { int amt; atomic_sub(size, &sk->sk_rmem_alloc); - - spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_forward_alloc += size; amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); sk->sk_forward_alloc -= amt; - spin_unlock_bh(&sk->sk_receive_queue.lock); if (amt) __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); } -static void udp_rmem_free(struct sk_buff *skb) +/* Note: called with sk_receive_queue.lock held */ +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) { - udp_rmem_release(skb->sk, skb->truesize, 1); + udp_rmem_release(sk, skb->truesize, 1); } +EXPORT_SYMBOL(udp_skb_destructor); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { @@ -1229,9 +1230,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) sk->sk_forward_alloc -= size; - /* the skb owner in now the udp socket */ - skb->sk = sk; - skb->destructor = udp_rmem_free; + /* no need to setup a destructor, we will explicitly release the + * forward allocated memory on dequeue + */ skb->dev = NULL; sock_skb_set_dropcount(sk, skb); @@ -1255,8 +1256,15 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); static void udp_destruct_sock(struct sock *sk) { /* reclaim completely the forward allocated memory */ - __skb_queue_purge(&sk->sk_receive_queue); - udp_rmem_release(sk, 0, 0); + unsigned int total = 0; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + total += skb->truesize; + kfree_skb(skb); + } + udp_rmem_release(sk, total, 0); + inet_sock_destruct(sk); } @@ -1288,12 +1296,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp); */ static int first_packet_length(struct sock *sk) { - struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff_head *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; + int total = 0; int res; - __skb_queue_head_init(&list_kill); - spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { @@ -1303,12 +1310,13 @@ static int first_packet_length(struct sock *sk) IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); - __skb_queue_tail(&list_kill, skb); + total += skb->truesize; + kfree_skb(skb); } res = skb ? skb->len : -1; + if (total) + udp_rmem_release(sk, total, 1); spin_unlock_bh(&rcvq->lock); - - __skb_queue_purge(&list_kill); return res; } @@ -1363,8 +1371,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -1421,7 +1428,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2343e4f2e0bf..0f00811a785f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -289,4 +289,28 @@ config IPV6_PIMSM_V2 Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. +config IPV6_SEG6_INLINE + bool "IPv6: direct Segment Routing Header insertion " + depends on IPV6 + ---help--- + Support for direct insertion of the Segment Routing Header, + also known as inline mode. Be aware that direct insertion of + extension headers (as opposed to encapsulation) may break + multiple mechanisms such as PMTUD or IPSec AH. Use this feature + only if you know exactly what you are doing. + + If unsure, say N. + +config IPV6_SEG6_HMAC + bool "IPv6: Segment Routing HMAC support" + depends on IPV6 + select CRYPTO_HMAC + select CRYPTO_SHA1 + select CRYPTO_SHA256 + ---help--- + Support for HMAC signature generation and verification + of SR-enabled packets. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c174ccb340a1..129cad2ba960 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o + udp_offload.o seg6.o seg6_iptunnel.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -44,6 +44,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o +obj-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 060dd9922018..86219c0a0104 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -238,6 +238,10 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -284,6 +288,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; /* Check if a valid qdisc is available */ @@ -4944,6 +4952,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; + array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; +#endif } static inline size_t inet6_ifla6_size(void) @@ -6036,6 +6048,22 @@ static const struct ctl_table addrconf_sysctl[] = { }, { + .procname = "seg6_enabled", + .data = &ipv6_devconf.seg6_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IPV6_SEG6_HMAC + { + .procname = "seg6_require_hmac", + .data = &ipv6_devconf.seg6_require_hmac, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..d424f3a3737a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,7 @@ #include <net/ip6_tunnel.h> #endif #include <net/calipso.h> +#include <net/seg6.h> #include <asm/uaccess.h> #include <linux/mroute6.h> @@ -678,6 +679,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); @@ -990,6 +992,10 @@ static int __init inet6_init(void) if (err) goto calipso_fail; + err = seg6_init(); + if (err) + goto seg6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1000,8 +1006,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - calipso_exit(); + seg6_exit(); #endif +seg6_fail: + calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0630a4d5daaa..189eb10b742d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2f30ed..c5d76d2edd26 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = np->flow_label; + fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; @@ -715,6 +716,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } + if (np->rxopt.bits.recvfragsize && opt->frag_max_size) { + int val = opt->frag_max_size; + + put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val); + } } void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 060a60b2f8a6..218f0cba231c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 139ceb68bd37..926818c331e5 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,6 +47,11 @@ #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif +#include <linux/seg6.h> +#include <net/seg6.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif #include <linux/uaccess.h> @@ -286,6 +291,182 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } +static void seg6_update_csum(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *hdr; + struct in6_addr *addr; + __be32 from, to; + + /* srh is at transport offset and seg_left is already decremented + * but daddr is not yet updated with next segment + */ + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + addr = hdr->segments + hdr->segments_left; + + hdr->segments_left++; + from = *(__be32 *)hdr; + + hdr->segments_left--; + to = *(__be32 *)hdr; + + /* update skb csum with diff resulting from seg_left decrement */ + + update_csum_diff4(skb, from, to); + + /* compute csum diff between current and next segment and update */ + + update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), + (__be32 *)addr); +} + +static int ipv6_srh_rcv(struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + struct ipv6_sr_hdr *hdr; + struct inet6_dev *idev; + struct in6_addr *addr; + bool cleanup = false; + int accept_seg6; + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + idev = __in6_dev_get(skb->dev); + + accept_seg6 = net->ipv6.devconf_all->seg6_enabled; + if (accept_seg6 > idev->cnf.seg6_enabled) + accept_seg6 = idev->cnf.seg6_enabled; + + if (!accept_seg6) { + kfree_skb(skb); + return -1; + } + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (!seg6_hmac_validate_skb(skb)) { + kfree_skb(skb); + return -1; + } +#endif + +looped_back: + if (hdr->segments_left > 0) { + if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && + sr_has_cleanup(hdr)) + cleanup = true; + } else { + if (hdr->nexthdr == NEXTHDR_IPV6) { + int offset = (hdr->hdrlen + 1) << 3; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + + if (!pskb_pull(skb, offset)) { + kfree_skb(skb); + return -1; + } + skb_postpull_rcsum(skb, skb_transport_header(skb), + offset); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + __skb_tunnel_rx(skb, skb->dev, net); + + netif_rx(skb); + return -1; + } + + opt->srcrt = skb_network_header_len(skb); + opt->lastopt = opt->srcrt; + skb->transport_header += (hdr->hdrlen + 1) << 3; + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); + + return 1; + } + + if (hdr->segments_left >= (hdr->hdrlen >> 1)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); + kfree_skb(skb); + return -1; + } + + if (skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -1; + } + } + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + hdr->segments_left--; + addr = hdr->segments + hdr->segments_left; + + skb_push(skb, sizeof(struct ipv6hdr)); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + seg6_update_csum(skb); + + ipv6_hdr(skb)->daddr = *addr; + + if (cleanup) { + int srhlen = (hdr->hdrlen + 1) << 3; + int nh = hdr->nexthdr; + + skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); + memmove(skb_network_header(skb) + srhlen, + skb_network_header(skb), + (unsigned char *)hdr - skb_network_header(skb)); + skb->network_header += srhlen; + ipv6_hdr(skb)->nexthdr = nh; + ipv6_hdr(skb)->payload_len = htons(skb->len - + sizeof(struct ipv6hdr)); + skb_push_rcsum(skb, sizeof(struct ipv6hdr)); + } + + skb_dst_drop(skb); + + ip6_route_input(skb); + + if (skb_dst(skb)->error) { + dst_input(skb); + return -1; + } + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (ipv6_hdr(skb)->hop_limit <= 1) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + kfree_skb(skb); + return -1; + } + ipv6_hdr(skb)->hop_limit--; + + /* be sure that srh is still present before reinjecting */ + if (!cleanup) { + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; + } + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + } + + dst_input(skb); + + return -1; +} + /******************************** Routing header. ********************************/ @@ -326,6 +507,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) return -1; } + /* segment routing */ + if (hdr->type == IPV6_SRCRT_TYPE_4) + return ipv6_srh_rcv(skb); + looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { @@ -679,9 +864,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb) * for headers. */ -static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, - struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p) +static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) { struct rt0_hdr *phdr, *ihdr; int hops; @@ -704,6 +889,62 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, *proto = NEXTHDR_ROUTING; } +static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + struct ipv6_sr_hdr *sr_phdr, *sr_ihdr; + int plen, hops; + + sr_ihdr = (struct ipv6_sr_hdr *)opt; + plen = (sr_ihdr->hdrlen + 1) << 3; + + sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen); + memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr)); + + hops = sr_ihdr->first_segment + 1; + memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1, + (hops - 1) * sizeof(struct in6_addr)); + + sr_phdr->segments[0] = **addr_p; + *addr_p = &sr_ihdr->segments[hops - 1]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(sr_phdr)) { + struct net *net = NULL; + + if (skb->dev) + net = dev_net(skb->dev); + else if (skb->sk) + net = sock_net(skb->sk); + + WARN_ON(!net); + + if (net) + seg6_push_hmac(net, saddr, sr_phdr); + } +#endif + + sr_phdr->nexthdr = *proto; + *proto = NEXTHDR_ROUTING; +} + +static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, + struct ipv6_rt_hdr *opt, + struct in6_addr **addr_p, struct in6_addr *saddr) +{ + switch (opt->type) { + case IPV6_SRCRT_TYPE_0: + ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); + break; + case IPV6_SRCRT_TYPE_4: + ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr); + break; + default: + break; + } +} + static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) { struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt)); @@ -715,10 +956,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, - struct in6_addr **daddr) + struct in6_addr **daddr, struct in6_addr *saddr) { if (opt->srcrt) { - ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); + ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); /* * IPV6_RTHDRDSTOPTS is ignored * unless IPV6_RTHDR is set (RFC3542). @@ -945,7 +1186,22 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, return NULL; *orig = fl6->daddr; - fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + + switch (opt->srcrt->type) { + case IPV6_SRCRT_TYPE_0: + fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; + break; + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; + + fl6->daddr = srh->segments[srh->first_segment]; + break; + } + default: + return NULL; + } + return orig; } EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d35f..ab249fee616b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -484,6 +485,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -658,6 +660,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 532c3ef282c5..1c86c478f578 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -88,6 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -136,6 +137,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d7d6d3ae0b3b..710bc79f9113 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) return -1; @@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6001e781164e..ddc878d2cc6d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -203,7 +203,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); + ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, + &fl6->saddr); } skb_push(skb, sizeof(struct ipv6hdr)); @@ -1672,7 +1673,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); + ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 03e050d22508..259e8507d2cd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1157,7 +1157,7 @@ route_lookup: if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL); } /* Calculate max headroom for all the headers and adjust @@ -1240,6 +1240,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1318,6 +1320,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 35c5b2d8c401..af3f0e011265 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1b9316e1386a..54d165b9845a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 636ec56f5f50..3ba530373560 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -52,6 +52,7 @@ #include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> +#include <net/seg6.h> #include <asm/uaccess.h> @@ -430,6 +431,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; #endif + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *) + opt->srcrt; + + if (!seg6_validate_srh(srh, optlen)) + goto sticky_done; + break; + } default: goto sticky_done; } @@ -868,6 +878,10 @@ pref_skip_coa: np->autoflowlabel = valbool; retv = 0; break; + case IPV6_RECVFRAGSIZE: + np->rxopt.bits.recvfragsize = valbool; + retv = 0; + break; } release_sock(sk); @@ -1310,6 +1324,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->autoflowlabel; break; + case IPV6_RECVFRAGSIZE: + val = np->rxopt.bits.recvfragsize; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d61..39970e212ad5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 66e2d9dfc43a..e1f8b34d7a2e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 610e09354b2e..291ebc260e70 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -776,6 +776,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3815e8505ed2..e1da5b888cc4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, { struct sk_buff *prev, *next; struct net_device *dev; - int offset, end; + int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); u8 ecn; @@ -336,6 +336,10 @@ found: fq->ecn |= ecn; add_frag_mem_limit(fq->q.net, skb->truesize); + fragsize = -skb_network_offset(skb) + skb->len; + if (fragsize > fq->q.max_size) + fq->q.max_size = fragsize; + /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ @@ -495,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; IP6CB(head)->flags |= IP6SKB_FRAGMENTED; + IP6CB(head)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ skb_postpush_rcsum(head, skb_network_header(head), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 947ed1ded026..6aa014eedccd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1405,7 +1405,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1417,6 +1417,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1430,7 +1431,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) struct dst_entry *dst; ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -1522,7 +1523,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1535,6 +1537,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1556,6 +1559,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1564,7 +1568,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); @@ -2797,6 +2802,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3371,6 +3377,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c new file mode 100644 index 000000000000..50f6e0663d1d --- /dev/null +++ b/net/ipv6/seg6.c @@ -0,0 +1,487 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <[email protected]> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/slab.h> + +#include <net/ipv6.h> +#include <net/protocol.h> + +#include <net/seg6.h> +#include <net/genetlink.h> +#include <linux/seg6.h> +#include <linux/seg6_genl.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) +{ + int trailing; + unsigned int tlv_offset; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (((srh->hdrlen + 1) << 3) != len) + return false; + + if (srh->segments_left != srh->first_segment) + return false; + + tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); + + trailing = len - tlv_offset; + if (trailing < 0) + return false; + + while (trailing) { + struct sr6_tlv *tlv; + unsigned int tlv_len; + + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); + tlv_len = sizeof(*tlv) + tlv->len; + + trailing -= tlv_len; + if (trailing < 0) + return false; + + tlv_offset += tlv_len; + } + + return true; +} + +static struct genl_family seg6_genl_family; + +static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { + [SEG6_ATTR_DST] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, }, + [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, }, + [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, }, + [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, }, + [SEG6_ATTR_ALGID] = { .type = NLA_U8, }, + [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, }, +}; + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + u32 hmackeyid; + char *secret; + int err = 0; + u8 algid; + u8 slen; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_HMACKEYID] || + !info->attrs[SEG6_ATTR_SECRETLEN] || + !info->attrs[SEG6_ATTR_ALGID]) + return -EINVAL; + + hmackeyid = nla_get_u32(info->attrs[SEG6_ATTR_HMACKEYID]); + slen = nla_get_u8(info->attrs[SEG6_ATTR_SECRETLEN]); + algid = nla_get_u8(info->attrs[SEG6_ATTR_ALGID]); + + if (hmackeyid == 0) + return -EINVAL; + + if (slen > SEG6_HMAC_SECRET_LEN) + return -EINVAL; + + mutex_lock(&sdata->lock); + hinfo = seg6_hmac_info_lookup(net, hmackeyid); + + if (!slen) { + if (!hinfo) + err = -ENOENT; + + err = seg6_hmac_info_del(net, hmackeyid); + + goto out_unlock; + } + + if (!info->attrs[SEG6_ATTR_SECRET]) { + err = -EINVAL; + goto out_unlock; + } + + if (hinfo) { + err = seg6_hmac_info_del(net, hmackeyid); + if (err) + goto out_unlock; + } + + secret = (char *)nla_data(info->attrs[SEG6_ATTR_SECRET]); + + hinfo = kzalloc(sizeof(*hinfo), GFP_KERNEL); + if (!hinfo) { + err = -ENOMEM; + goto out_unlock; + } + + memcpy(hinfo->secret, secret, slen); + hinfo->slen = slen; + hinfo->alg_id = algid; + hinfo->hmackeyid = hmackeyid; + + err = seg6_hmac_info_add(net, hmackeyid, hinfo); + if (err) + kfree(hinfo); + +out_unlock: + mutex_unlock(&sdata->lock); + return err; +} + +#else + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + return -ENOTSUPP; +} + +#endif + +static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *val, *t_old, *t_new; + struct seg6_pernet_data *sdata; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_DST]) + return -EINVAL; + + val = nla_data(info->attrs[SEG6_ATTR_DST]); + t_new = kmemdup(val, sizeof(*val), GFP_KERNEL); + + mutex_lock(&sdata->lock); + + t_old = sdata->tun_src; + rcu_assign_pointer(sdata->tun_src, t_new); + + mutex_unlock(&sdata->lock); + + synchronize_net(); + kfree(t_old); + + return 0; +} + +static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *tun_src; + struct sk_buff *msg; + void *hdr; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC); + if (!hdr) + goto free_msg; + + rcu_read_lock(); + tun_src = rcu_dereference(seg6_pernet(net)->tun_src); + + if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src)) + goto nla_put_failure; + + rcu_read_unlock(); + + genlmsg_end(msg, hdr); + genlmsg_reply(msg, info); + + return 0; + +nla_put_failure: + rcu_read_unlock(); + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -ENOMEM; +} + +#ifdef CONFIG_IPV6_SEG6_HMAC + +static int __seg6_hmac_fill_info(struct seg6_hmac_info *hinfo, + struct sk_buff *msg) +{ + if (nla_put_u32(msg, SEG6_ATTR_HMACKEYID, hinfo->hmackeyid) || + nla_put_u8(msg, SEG6_ATTR_SECRETLEN, hinfo->slen) || + nla_put(msg, SEG6_ATTR_SECRET, hinfo->slen, hinfo->secret) || + nla_put_u8(msg, SEG6_ATTR_ALGID, hinfo->alg_id)) + return -1; + + return 0; +} + +static int __seg6_genl_dumphmac_element(struct seg6_hmac_info *hinfo, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &seg6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (__seg6_hmac_fill_info(hinfo, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct seg6_pernet_data *sdata; + struct rhashtable_iter *iter; + + sdata = seg6_pernet(net); + iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&sdata->hmac_infos, iter); + + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + + kfree(iter); + + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + struct net *net = sock_net(skb->sk); + struct seg6_pernet_data *sdata; + struct seg6_hmac_info *hinfo; + int ret; + + sdata = seg6_pernet(net); + + ret = rhashtable_walk_start(iter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + hinfo = rhashtable_walk_next(iter); + + if (IS_ERR(hinfo)) { + if (PTR_ERR(hinfo) == -EAGAIN) + continue; + ret = PTR_ERR(hinfo); + goto done; + } else if (!hinfo) { + break; + } + + ret = __seg6_genl_dumphmac_element(hinfo, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, SEG6_CMD_DUMPHMAC); + if (ret) + goto done; + } + + ret = skb->len; + +done: + rhashtable_walk_stop(iter); + return ret; +} + +#else + +static int seg6_genl_dumphmac_start(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac_done(struct netlink_callback *cb) +{ + return 0; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -ENOTSUPP; +} + +#endif + +static int __net_init seg6_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata; + + sdata = kzalloc(sizeof(*sdata), GFP_KERNEL); + if (!sdata) + return -ENOMEM; + + mutex_init(&sdata->lock); + + sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL); + if (!sdata->tun_src) { + kfree(sdata); + return -ENOMEM; + } + + net->ipv6.seg6_data = sdata; + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_init(net); +#endif + + return 0; +} + +static void __net_exit seg6_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_net_exit(net); +#endif + + kfree(sdata->tun_src); + kfree(sdata); +} + +static struct pernet_operations ip6_segments_ops = { + .init = seg6_net_init, + .exit = seg6_net_exit, +}; + +static const struct genl_ops seg6_genl_ops[] = { + { + .cmd = SEG6_CMD_SETHMAC, + .doit = seg6_genl_sethmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_DUMPHMAC, + .start = seg6_genl_dumphmac_start, + .dumpit = seg6_genl_dumphmac, + .done = seg6_genl_dumphmac_done, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_SET_TUNSRC, + .doit = seg6_genl_set_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_GET_TUNSRC, + .doit = seg6_genl_get_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static struct genl_family seg6_genl_family __ro_after_init = { + .hdrsize = 0, + .name = SEG6_GENL_NAME, + .version = SEG6_GENL_VERSION, + .maxattr = SEG6_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = seg6_genl_ops, + .n_ops = ARRAY_SIZE(seg6_genl_ops), + .module = THIS_MODULE, +}; + +int __init seg6_init(void) +{ + int err = -ENOMEM; + + err = genl_register_family(&seg6_genl_family); + if (err) + goto out; + + err = register_pernet_subsys(&ip6_segments_ops); + if (err) + goto out_unregister_genl; + + err = seg6_iptunnel_init(); + if (err) + goto out_unregister_pernet; + +#ifdef CONFIG_IPV6_SEG6_HMAC + err = seg6_hmac_init(); + if (err) + goto out_unregister_iptun; +#endif + + pr_info("Segment Routing with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_SEG6_HMAC +out_unregister_iptun: + seg6_iptunnel_exit(); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); +out_unregister_genl: + genl_unregister_family(&seg6_genl_family); + goto out; +} + +void seg6_exit(void) +{ +#ifdef CONFIG_IPV6_SEG6_HMAC + seg6_hmac_exit(); +#endif + seg6_iptunnel_exit(); + unregister_pernet_subsys(&ip6_segments_ops); + genl_unregister_family(&seg6_genl_family); +} diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c new file mode 100644 index 000000000000..ef1c8a46e7ac --- /dev/null +++ b/net/ipv6/seg6_hmac.c @@ -0,0 +1,484 @@ +/* + * SR-IPv6 implementation -- HMAC functions + * + * Author: + * David Lebrun <[email protected]> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/in6.h> +#include <linux/icmpv6.h> +#include <linux/mroute6.h> +#include <linux/slab.h> + +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> + +#include <net/sock.h> +#include <net/snmp.h> + +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/transp_v6.h> +#include <net/rawv6.h> +#include <net/ndisc.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/xfrm.h> + +#include <linux/cryptohash.h> +#include <crypto/hash.h> +#include <crypto/sha.h> +#include <net/seg6.h> +#include <net/genetlink.h> +#include <net/seg6_hmac.h> +#include <linux/random.h> + +static char * __percpu *hmac_ring; + +static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct seg6_hmac_info *hinfo = obj; + + return (hinfo->hmackeyid != *(__u32 *)arg->key); +} + +static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo) +{ + kfree_rcu(hinfo, rcu); +} + +static void seg6_free_hi(void *ptr, void *arg) +{ + struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr; + + if (hinfo) + seg6_hinfo_release(hinfo); +} + +static const struct rhashtable_params rht_params = { + .head_offset = offsetof(struct seg6_hmac_info, node), + .key_offset = offsetof(struct seg6_hmac_info, hmackeyid), + .key_len = sizeof(u32), + .automatic_shrinking = true, + .obj_cmpfn = seg6_hmac_cmpfn, +}; + +static struct seg6_hmac_algo hmac_algos[] = { + { + .alg_id = SEG6_HMAC_ALGO_SHA1, + .name = "hmac(sha1)", + }, + { + .alg_id = SEG6_HMAC_ALGO_SHA256, + .name = "hmac(sha256)", + }, +}; + +static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) +{ + struct sr6_tlv_hmac *tlv; + + if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5) + return NULL; + + if (!sr_has_hmac(srh)) + return NULL; + + tlv = (struct sr6_tlv_hmac *) + ((char *)srh + ((srh->hdrlen + 1) << 3) - 40); + + if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38) + return NULL; + + return tlv; +} + +static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) +{ + struct seg6_hmac_algo *algo; + int i, alg_count; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + if (algo->alg_id == alg_id) + return algo; + } + + return NULL; +} + +static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, + u8 *output, int outlen) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int ret, dgsize; + + algo = __hmac_get_algo(hinfo->alg_id); + if (!algo) + return -ENOENT; + + tfm = *this_cpu_ptr(algo->tfms); + + dgsize = crypto_shash_digestsize(tfm); + if (dgsize > outlen) { + pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", + dgsize, outlen); + return -ENOMEM; + } + + ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); + goto failed; + } + + shash = *this_cpu_ptr(algo->shashs); + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, text, psize, output); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); + goto failed; + } + + return dgsize; + +failed: + return ret; +} + +int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, + struct in6_addr *saddr, u8 *output) +{ + __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); + u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; + int plen, i, dgsize, wrsize; + char *ring, *off; + + /* a 160-byte buffer for digest output allows to store highest known + * hash function (RadioGatun) with up to 1216 bits + */ + + /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */ + plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; + + /* this limit allows for 14 segments */ + if (plen >= SEG6_HMAC_RING_SIZE) + return -EMSGSIZE; + + /* Let's build the HMAC text on the ring buffer. The text is composed + * as follows, in order: + * + * 1. Source IPv6 address (128 bits) + * 2. first_segment value (8 bits) + * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0) + * 4. HMAC Key ID (32 bits) + * 5. All segments in the segments list (n * 128 bits) + */ + + local_bh_disable(); + ring = *this_cpu_ptr(hmac_ring); + off = ring; + + /* source address */ + memcpy(off, saddr, 16); + off += 16; + + /* first_segment value */ + *off++ = hdr->first_segment; + + /* cleanup flag */ + *off++ = !!(sr_has_cleanup(hdr)) << 7; + + /* HMAC Key ID */ + memcpy(off, &hmackeyid, 4); + off += 4; + + /* all segments in the list */ + for (i = 0; i < hdr->first_segment + 1; i++) { + memcpy(off, hdr->segments + i, 16); + off += 16; + } + + dgsize = __do_hmac(hinfo, ring, plen, tmp_out, + SEG6_HMAC_MAX_DIGESTSIZE); + local_bh_enable(); + + if (dgsize < 0) + return dgsize; + + wrsize = SEG6_HMAC_FIELD_LEN; + if (wrsize > dgsize) + wrsize = dgsize; + + memset(output, 0, SEG6_HMAC_FIELD_LEN); + memcpy(output, tmp_out, wrsize); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_compute); + +/* checks if an incoming SR-enabled packet's HMAC status matches + * the incoming policy. + * + * called with rcu_read_lock() + */ +bool seg6_hmac_validate_skb(struct sk_buff *skb) +{ + u8 hmac_output[SEG6_HMAC_FIELD_LEN]; + struct net *net = dev_net(skb->dev); + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + struct ipv6_sr_hdr *srh; + struct inet6_dev *idev; + + idev = __in6_dev_get(skb->dev); + + srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + tlv = seg6_get_tlv_hmac(srh); + + /* mandatory check but no tlv */ + if (idev->cnf.seg6_require_hmac > 0 && !tlv) + return false; + + /* no check */ + if (idev->cnf.seg6_require_hmac < 0) + return true; + + /* check only if present */ + if (idev->cnf.seg6_require_hmac == 0 && !tlv) + return true; + + /* now, seg6_require_hmac >= 0 && tlv */ + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + return false; + + if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) + return false; + + if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + return false; + + return true; +} +EXPORT_SYMBOL(seg6_hmac_validate_skb); + +/* called with rcu_read_lock() */ +struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + + return hinfo; +} +EXPORT_SYMBOL(seg6_hmac_info_lookup); + +int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + int err; + + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_add); + +int seg6_hmac_info_del(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + int err = -ENOENT; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + if (!hinfo) + goto out; + + err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + if (err) + goto out; + + seg6_hinfo_release(hinfo); + +out: + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_del); + +int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh) +{ + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + int err = -ENOENT; + + tlv = seg6_get_tlv_hmac(srh); + if (!tlv) + return -EINVAL; + + rcu_read_lock(); + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + goto out; + + memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN); + err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(seg6_push_hmac); + +static int seg6_hmac_init_ring(void) +{ + int i; + + hmac_ring = alloc_percpu(char *); + + if (!hmac_ring) + return -ENOMEM; + + for_each_possible_cpu(i) { + char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL); + + if (!ring) + return -ENOMEM; + + *per_cpu_ptr(hmac_ring, i) = ring; + } + + return 0; +} + +static int seg6_hmac_init_algo(void) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int i, alg_count, cpu; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + + for (i = 0; i < alg_count; i++) { + struct crypto_shash **p_tfm; + int shsize; + + algo = &hmac_algos[i]; + algo->tfms = alloc_percpu(struct crypto_shash *); + if (!algo->tfms) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + p_tfm = per_cpu_ptr(algo->tfms, cpu); + *p_tfm = tfm; + } + + p_tfm = this_cpu_ptr(algo->tfms); + tfm = *p_tfm; + + shsize = sizeof(*shash) + crypto_shash_descsize(tfm); + + algo->shashs = alloc_percpu(struct shash_desc *); + if (!algo->shashs) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + shash = kzalloc(shsize, GFP_KERNEL); + if (!shash) + return -ENOMEM; + *per_cpu_ptr(algo->shashs, cpu) = shash; + } + } + + return 0; +} + +int __init seg6_hmac_init(void) +{ + int ret; + + ret = seg6_hmac_init_ring(); + if (ret < 0) + goto out; + + ret = seg6_hmac_init_algo(); + +out: + return ret; +} +EXPORT_SYMBOL(seg6_hmac_init); + +int __net_init seg6_hmac_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_init(&sdata->hmac_infos, &rht_params); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_net_init); + +void seg6_hmac_exit(void) +{ + struct seg6_hmac_algo *algo = NULL; + int i, alg_count, cpu; + + for_each_possible_cpu(i) { + char *ring = *per_cpu_ptr(hmac_ring, i); + + kfree(ring); + } + free_percpu(hmac_ring); + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + for_each_possible_cpu(cpu) { + struct crypto_shash *tfm; + struct shash_desc *shash; + + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); + free_percpu(algo->shashs); + } +} +EXPORT_SYMBOL(seg6_hmac_exit); + +void __net_exit seg6_hmac_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL); +} +EXPORT_SYMBOL(seg6_hmac_net_exit); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c new file mode 100644 index 000000000000..bbfca22c34ae --- /dev/null +++ b/net/ipv6/seg6_iptunnel.c @@ -0,0 +1,431 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <[email protected]> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/lwtunnel.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include <net/ip6_fib.h> +#include <net/route.h> +#include <net/seg6.h> +#include <linux/seg6.h> +#include <linux/seg6_iptunnel.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#ifdef CONFIG_DST_CACHE +#include <net/dst_cache.h> +#endif +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif + +struct seg6_lwt { +#ifdef CONFIG_DST_CACHE + struct dst_cache cache; +#endif + struct seg6_iptunnel_encap tuninfo[0]; +}; + +static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_lwt *)lwt->data; +} + +static inline struct seg6_iptunnel_encap * +seg6_encap_lwtunnel(struct lwtunnel_state *lwt) +{ + return seg6_lwt_lwtunnel(lwt)->tuninfo; +} + +static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { + [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, +}; + +int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) +{ + struct seg6_iptunnel_encap *data; + struct nlattr *nla; + int len; + + len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, tuninfo, len); + + return 0; +} + +static void set_tun_src(struct net *net, struct net_device *dev, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct in6_addr *tun_src; + + rcu_read_lock(); + + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, + saddr); + } + + rcu_read_unlock(); +} + +/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + struct ipv6hdr *hdr, *inner_hdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, tot_len, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + + err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, tot_len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + hdr = ipv6_hdr(skb); + + /* inherit tc, flowlabel and hlim + * hlim will be decremented in ip6_forward() afterwards and + * decapsulation will overwrite inner hlim with outer hlim + */ + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = NEXTHDR_IPV6; + + hdr->daddr = isrh->segments[isrh->first_segment]; + set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, tot_len); + + return 0; +} + +/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +#ifdef CONFIG_IPV6_SEG6_INLINE +static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct ipv6hdr *hdr, *oldhdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + + err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), + sizeof(struct ipv6hdr)); + + skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + + memmove(hdr, oldhdr, sizeof(*hdr)); + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = hdr->nexthdr; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh->segments[0] = hdr->daddr; + hdr->daddr = isrh->segments[isrh->first_segment]; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (sr_has_hmac(isrh)) { + struct net *net = dev_net(skb_dst(skb)->dev); + + err = seg6_push_hmac(net, &hdr->saddr, isrh); + if (unlikely(err)) + return err; + } +#endif + + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); + + return 0; +} +#endif + +static int seg6_do_srh(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct seg6_iptunnel_encap *tinfo; + int err = 0; + + tinfo = seg6_encap_lwtunnel(dst->lwtstate); + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + switch (tinfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + err = seg6_do_srh_inline(skb, tinfo->srh); + skb_reset_inner_headers(skb); + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + err = seg6_do_srh_encap(skb, tinfo->srh); + break; + } + + if (err) + return err; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_set_inner_protocol(skb, skb->protocol); + + return 0; +} + +int seg6_input(struct sk_buff *skb) +{ + int err; + + err = seg6_do_srh(skb); + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); +} + +int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; + int err = -EINVAL; + + err = seg6_do_srh(skb); + if (unlikely(err)) + goto drop; + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +#ifdef CONFIG_DST_CACHE + dst = dst_cache_get(&slwt->cache); +#endif + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + +#ifdef CONFIG_DST_CACHE + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); +#endif + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); +drop: + kfree_skb(skb); + return err; +} + +static int seg6_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; + struct seg6_iptunnel_encap *tuninfo; + struct lwtunnel_state *newts; + int tuninfo_len, min_size; + struct seg6_lwt *slwt; + int err; + + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy); + + if (err < 0) + return err; + + if (!tb[SEG6_IPTUNNEL_SRH]) + return -EINVAL; + + tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); + tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); + + /* tuninfo must contain at least the iptunnel encap structure, + * the SRH and one segment + */ + min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + + sizeof(struct in6_addr); + if (tuninfo_len < min_size) + return -EINVAL; + + switch (tuninfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + break; + default: + return -EINVAL; + } + + /* verify that SRH is consistent */ + if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo))) + return -EINVAL; + + newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_lwt_lwtunnel(newts); + +#ifdef CONFIG_DST_CACHE + err = dst_cache_init(&slwt->cache, GFP_KERNEL); + if (err) { + kfree(newts); + return err; + } +#endif + + memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + + newts->type = LWTUNNEL_ENCAP_SEG6; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); + + *ts = newts; + + return 0; +} + +#ifdef CONFIG_DST_CACHE +static void seg6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); +} +#endif + +static int seg6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) + return -EMSGSIZE; + + return 0; +} + +static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); +} + +static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); + struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); + + if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) + return 1; + + return memcmp(a_hdr, b_hdr, len); +} + +static const struct lwtunnel_encap_ops seg6_iptun_ops = { + .build_state = seg6_build_state, +#ifdef CONFIG_DST_CACHE + .destroy_state = seg6_destroy_state, +#endif + .output = seg6_output, + .input = seg6_input, + .fill_encap = seg6_fill_encap_info, + .get_encap_size = seg6_encap_nlsize, + .cmp_encap = seg6_encap_cmp, +}; + +int __init seg6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} + +void seg6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 59c483937aec..97830a6a9cbb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -227,6 +227,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a27ab4eab39..aece1b15e744 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -824,6 +825,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3e232585b0ff..86a8cacd333b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -344,8 +344,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -426,7 +425,7 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); } else { if (np->rxopt.all) @@ -1139,6 +1138,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a2ed3bda4ddc..85948c69b236 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -715,7 +715,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, l2tp_info(session, L2TP_MSG_SEQ, "%s: requested to enable seq numbers by LNS\n", session->name); - session->send_seq = -1; + session->send_seq = 1; l2tp_session_set_header_len(session, tunnel->version); } } else { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ad3468c32b53..1cea54feab27 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -519,6 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 59aa2d204e4a..3620fba31786 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -220,14 +220,14 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]); if (info->attrs[L2TP_ATTR_UDP_DPORT]) cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); - if (info->attrs[L2TP_ATTR_UDP_CSUM]) - cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]); + cfg.use_udp_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_CSUM]); #if IS_ENABLED(CONFIG_IPV6) - if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]) - cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); - if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]) - cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); + cfg.udp6_zero_tx_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + cfg.udp6_zero_rx_checksums = nla_get_flag( + info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); #endif } @@ -379,9 +379,24 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: + switch (sk->sk_family) { + case AF_INET: + if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) + goto nla_put_failure; + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (udp_get_no_check6_tx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX)) + goto nla_put_failure; + if (udp_get_no_check6_rx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX)) + goto nla_put_failure; + break; +#endif + } if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || - nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) || - nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) + nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) goto nla_put_failure; /* NOBREAK */ case L2TP_ENCAPTYPE_IP: diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 41d47bfda15c..2ddfec1e4acf 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1272,7 +1272,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->recv_seq = val ? -1 : 0; + session->recv_seq = !!val; l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set recv_seq=%d\n", session->name, session->recv_seq); @@ -1283,7 +1283,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->send_seq = val ? -1 : 0; + session->send_seq = !!val; { struct sock *ssk = ps->sock; struct pppox_sock *po = pppox_sk(ssk); @@ -1301,7 +1301,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, err = -EINVAL; break; } - session->lns_mode = val ? -1 : 0; + session->lns_mode = !!val; l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set lns_mode=%d\n", session->name, session->lns_mode); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 1105c4e29c62..514f7bcf7c63 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -66,6 +66,7 @@ struct ovs_frag_data { u16 vlan_tci; __be16 vlan_proto; unsigned int l2_len; + u8 mac_proto; u8 l2_data[MAX_L2_LEN]; }; @@ -137,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, static void invalidate_flow_key(struct sw_flow_key *key) { - key->eth.type = htons(0); + key->mac_proto |= SW_FLOW_KEY_INVALID; } static bool is_flow_key_valid(const struct sw_flow_key *key) { - return !!key->eth.type; + return !(key->mac_proto & SW_FLOW_KEY_INVALID); } static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, @@ -186,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); - update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); + if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) + update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); skb->protocol = mpls->mpls_ethertype; invalidate_flow_key(key); @@ -196,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, const __be16 ethertype) { - struct ethhdr *hdr; int err; err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); @@ -212,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - /* mpls_hdr() is used to locate the ethertype field correctly in the - * presence of VLAN tags. - */ - hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); - update_ethertype(skb, hdr, ethertype); + if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) { + struct ethhdr *hdr; + + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. + */ + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); + update_ethertype(skb, hdr, ethertype); + } if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -312,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } +/* pop_eth does not support VLAN packets as this action is never called + * for them. + */ +static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key) +{ + skb_pull_rcsum(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + +static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, + const struct ovs_action_push_eth *ethh) +{ + struct ethhdr *hdr; + + /* Add the new Ethernet header */ + if (skb_cow_head(skb, ETH_HLEN) < 0) + return -ENOMEM; + + skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + hdr = eth_hdr(skb); + ether_addr_copy(hdr->h_source, ethh->addresses.eth_src); + ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst); + hdr->h_proto = skb->protocol; + + skb_postpush_rcsum(skb, hdr, ETH_HLEN); + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_ETHERNET; + invalidate_flow_key(key); + return 0; +} + static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, __be32 addr, __be32 new_addr) { @@ -673,7 +719,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk skb_reset_mac_len(skb); } - ovs_vport_send(vport, skb); + ovs_vport_send(vport, skb, data->mac_proto); return 0; } @@ -692,7 +738,7 @@ static struct dst_ops ovs_dst_ops = { * ovs_vport_output(), which is called once per fragmented packet. */ static void prepare_frag(struct vport *vport, struct sk_buff *skb, - u16 orig_network_offset) + u16 orig_network_offset, u8 mac_proto) { unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; @@ -705,6 +751,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, data->network_offset = orig_network_offset; data->vlan_tci = skb->vlan_tci; data->vlan_proto = skb->vlan_proto; + data->mac_proto = mac_proto; data->l2_len = hlen; memcpy(&data->l2_data, skb->data, hlen); @@ -713,7 +760,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, } static void ovs_fragment(struct net *net, struct vport *vport, - struct sk_buff *skb, u16 mru, __be16 ethertype) + struct sk_buff *skb, u16 mru, + struct sw_flow_key *key) { u16 orig_network_offset = 0; @@ -727,11 +775,12 @@ static void ovs_fragment(struct net *net, struct vport *vport, goto err; } - if (ethertype == htons(ETH_P_IP)) { + if (key->eth.type == htons(ETH_P_IP)) { struct dst_entry ovs_dst; unsigned long orig_dst; - prepare_frag(vport, skb, orig_network_offset); + prepare_frag(vport, skb, orig_network_offset, + ovs_key_mac_proto(key)); dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_dst.dev = vport->dev; @@ -742,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, ip_do_fragment(net, skb->sk, skb, ovs_vport_output); refdst_drop(orig_dst); - } else if (ethertype == htons(ETH_P_IPV6)) { + } else if (key->eth.type == htons(ETH_P_IPV6)) { const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); unsigned long orig_dst; struct rt6_info ovs_rt; @@ -751,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport, goto err; } - prepare_frag(vport, skb, orig_network_offset); + prepare_frag(vport, skb, orig_network_offset, + ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); @@ -765,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, refdst_drop(orig_dst); } else { WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", - ovs_vport_name(vport), ntohs(ethertype), mru, + ovs_vport_name(vport), ntohs(key->eth.type), mru, vport->dev->mtu); goto err; } @@ -785,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, u32 cutlen = OVS_CB(skb)->cutlen; if (unlikely(cutlen > 0)) { - if (skb->len - cutlen > ETH_HLEN) + if (skb->len - cutlen > ovs_mac_header_len(key)) pskb_trim(skb, skb->len - cutlen); else - pskb_trim(skb, ETH_HLEN); + pskb_trim(skb, ovs_mac_header_len(key)); } - if (likely(!mru || (skb->len <= mru + ETH_HLEN))) { - ovs_vport_send(vport, skb); + if (likely(!mru || + (skb->len <= mru + vport->dev->hard_header_len))) { + ovs_vport_send(vport, skb, ovs_key_mac_proto(key)); } else if (mru <= vport->dev->mtu) { struct net *net = read_pnet(&dp->net); - __be16 ethertype = key->eth.type; - - if (!is_flow_key_valid(key)) { - if (eth_p_mpls(skb->protocol)) - ethertype = skb->inner_protocol; - else - ethertype = vlan_get_protocol(skb); - } - ovs_fragment(net, vport, skb, mru, ethertype); + ovs_fragment(net, vport, skb, mru, key); } else { kfree_skb(skb); } @@ -1198,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, if (err) return err == -EINPROGRESS ? 0 : err; break; + + case OVS_ACTION_ATTR_PUSH_ETH: + err = push_eth(skb, key, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_ETH: + err = pop_eth(skb, key); + break; } if (unlikely(err)) { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index fa8760176b7d..1402f1be642d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -560,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct datapath *dp; - struct ethhdr *eth; struct vport *input_vport; u16 mru = 0; int len; @@ -581,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); - skb_reset_mac_header(packet); - eth = eth_hdr(packet); - - /* Normally, setting the skb 'protocol' field would be handled by a - * call to eth_type_trans(), but it assumes there's a sending - * device, which we may not have. */ - if (eth_proto_is_802_3(eth->h_proto)) - packet->protocol = eth->h_proto; - else - packet->protocol = htons(ETH_P_802_2); - /* Set packet's mru */ if (a[OVS_PACKET_ATTR_MRU]) { mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]); @@ -618,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; + packet->protocol = flow->key.eth.type; rcu_read_lock(); dp = get_dp_rcu(net, ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 22087062bd10..08aa926cd5cf 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) return 1; } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +static void clear_vlan(struct sw_flow_key *key) { - int res; - key->eth.vlan.tci = 0; key->eth.vlan.tpid = 0; key->eth.cvlan.tci = 0; key->eth.cvlan.tpid = 0; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + int res; if (skb_vlan_tag_present(skb)) { key->eth.vlan.tci = htons(skb->vlan_tci); @@ -483,17 +486,20 @@ invalid: * * Returns 0 if successful, otherwise a negative errno value. * - * Initializes @skb header pointers as follows: + * Initializes @skb header fields as follows: * - * - skb->mac_header: the Ethernet header. + * - skb->mac_header: the L2 header. * - * - skb->network_header: just past the Ethernet header, or just past the - * VLAN header, to the first byte of the Ethernet payload. + * - skb->network_header: just past the L2 header, or just past the + * VLAN header, to the first byte of the L2 payload. * * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. * For other key->eth.type values it is left untouched. + * + * - skb->protocol: the type of the data starting at skb->network_header. + * Equals to key->eth.type. */ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) { @@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) skb_reset_mac_header(skb); - /* Link layer. We are guaranteed to have at least the 14 byte Ethernet - * header in the linear data area. - */ - eth = eth_hdr(skb); - ether_addr_copy(key->eth.src, eth->h_source); - ether_addr_copy(key->eth.dst, eth->h_dest); + /* Link layer. */ + clear_vlan(key); + if (key->mac_proto == MAC_PROTO_NONE) { + if (unlikely(eth_type_vlan(skb->protocol))) + return -EINVAL; - __skb_pull(skb, 2 * ETH_ALEN); - /* We are going to push all headers that we pull, so no need to - * update skb->csum here. - */ + skb_reset_network_header(skb); + } else { + eth = eth_hdr(skb); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ - key->eth.type = parse_ethertype(skb); - if (unlikely(key->eth.type == htons(0))) - return -ENOMEM; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; + + skb->protocol = parse_ethertype(skb); + if (unlikely(skb->protocol == htons(0))) + return -ENOMEM; - skb_reset_network_header(skb); + skb_reset_network_header(skb); + __skb_push(skb, skb->data - skb_mac_header(skb)); + } skb_reset_mac_len(skb); - __skb_push(skb, skb->data - skb_mac_header(skb)); + key->eth.type = skb->protocol; /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { @@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) return key_extract(skb, key); } +static int key_extract_mac_proto(struct sk_buff *skb) +{ + switch (skb->dev->type) { + case ARPHRD_ETHER: + return MAC_PROTO_ETHERNET; + case ARPHRD_NONE: + if (skb->protocol == htons(ETH_P_TEB)) + return MAC_PROTO_ETHERNET; + return MAC_PROTO_NONE; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { + int res; + /* Extract metadata from packet. */ if (tun_info) { key->tun_proto = ip_tunnel_info_af(tun_info); @@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->phy.skb_mark = skb->mark; ovs_ct_fill_key(skb, key); key->ovs_flow_hash = 0; + res = key_extract_mac_proto(skb); + if (res < 0) + return res; + key->mac_proto = res; key->recirc_id = 0; return key_extract(skb, key); @@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, if (err) return err; + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { + /* key_extract assumes that skb->protocol is set-up for + * layer 3 packets which is the case for other callers, + * in particular packets recieved from the network stack. + * Here the correct value can be set from the metadata + * extracted above. + */ + skb->protocol = key->eth.type; + } else { + struct ethhdr *eth; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + /* Normally, setting the skb 'protocol' field would be + * handled by a call to eth_type_trans(), but it assumes + * there's a sending device, which we may not have. + */ + if (eth_proto_is_802_3(eth->h_proto)) + skb->protocol = eth->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + } + return key_extract(skb, key); } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index ae783f5c6695..f61cae7f9030 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -37,6 +37,12 @@ struct sk_buff; +enum sw_flow_mac_proto { + MAC_PROTO_NONE = 0, + MAC_PROTO_ETHERNET, +}; +#define SW_FLOW_KEY_INVALID 0x80 + /* Store options at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length * matching for small options. @@ -68,6 +74,7 @@ struct sw_flow_key { u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } __packed phy; /* Safe when right after 'tun_key'. */ + u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */ u8 tun_proto; /* Protocol of encapsulating tunnel. */ u32 ovs_flow_hash; /* Datapath computed hash value. */ u32 recirc_id; /* Recirculation ID. */ @@ -206,6 +213,21 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; +static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key) +{ + return key->mac_proto & ~SW_FLOW_KEY_INVALID; +} + +static inline u16 __ovs_mac_header_len(u8 mac_proto) +{ + return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0; +} + +static inline u16 ovs_mac_header_len(const struct sw_flow_key *key) +{ + return __ovs_mac_header_len(ovs_key_mac_proto(key)); +} + static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid) { return sfid->ufid_len; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ae25ded82b3b..d19044f2b1f4 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match, static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs, bool log) { - u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; + u64 key_expected = 0; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ /* The following mask attributes allowed only if they @@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match, return 0; } +static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, + u64 *attrs, const struct nlattr **a, + bool is_mask, bool log) +{ + __be16 eth_type; + + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + if (is_mask) { + /* Always exact match EtherType. */ + eth_type = htons(0xffff); + } else if (!eth_proto_is_802_3(eth_type)) { + OVS_NLERR(log, "EtherType %x is less than min %x", + ntohs(eth_type), ETH_P_802_3_MIN); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); + *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + return 0; +} + static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) { + u8 mac_proto = MAC_PROTO_ETHERNET; + if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); @@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, sizeof(*cl), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } + + /* For layer 3 packets the Ethernet type is provided + * and treated as metadata but no MAC addresses are provided. + */ + if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && + (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) + mac_proto = MAC_PROTO_NONE; + + /* Always exact match mac_proto */ + SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); + + if (mac_proto == MAC_PROTO_NONE) + return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, + log); + return 0; } @@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, SW_FLOW_KEY_MEMCPY(match, eth.dst, eth_key->eth_dst, ETH_ALEN, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); - } - if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - /* VLAN attribute is always parsed before getting here since it - * may occur multiple times. - */ - OVS_NLERR(log, "VLAN attribute unexpected."); - return -EINVAL; - } - - if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { - __be16 eth_type; - - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (is_mask) { - /* Always exact match EtherType. */ - eth_type = htons(0xffff); - } else if (!eth_proto_is_802_3(eth_type)) { - OVS_NLERR(log, "EtherType %x is less than min %x", - ntohs(eth_type), ETH_P_802_3_MIN); + if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { + /* VLAN attribute is always parsed before getting here since it + * may occur multiple times. + */ + OVS_NLERR(log, "VLAN attribute unexpected."); return -EINVAL; } - SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - } else if (!is_mask) { - SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { + err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, + log); + if (err) + return err; + } else if (!is_mask) { + SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); + } + } else if (!match->key->eth.type) { + OVS_NLERR(log, "Either Ethernet header or EtherType is required."); + return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { @@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (ovs_ct_put_key(output, skb)) goto nla_put_failure; - nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); - if (!nla) - goto nla_put_failure; - - eth_key = nla_data(nla); - ether_addr_copy(eth_key->eth_src, output->eth.src); - ether_addr_copy(eth_key->eth_dst, output->eth.dst); - - if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { - if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) + if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); + if (!nla) goto nla_put_failure; - encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.vlan.tci) - goto unencap; - if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { - if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + eth_key = nla_data(nla); + ether_addr_copy(eth_key->eth_src, output->eth.src); + ether_addr_copy(eth_key->eth_dst, output->eth.dst); + + if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; - in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.cvlan.tci) + encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.vlan.tci) goto unencap; + + if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + goto nla_put_failure; + in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.cvlan.tci) + goto unencap; + } } - } - if (swkey->eth.type == htons(ETH_P_802_2)) { - /* - * Ethertype 802.2 is represented in the netlink with omitted - * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and - * 0xffff in the mask attribute. Ethertype can also - * be wildcarded. - */ - if (is_mask && output->eth.type) - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, - output->eth.type)) - goto nla_put_failure; - goto unencap; + if (swkey->eth.type == htons(ETH_P_802_2)) { + /* + * Ethertype 802.2 is represented in the netlink with omitted + * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and + * 0xffff in the mask attribute. Ethertype can also + * be wildcarded. + */ + if (is_mask && output->eth.type) + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, + output->eth.type)) + goto nla_put_failure; + goto unencap; + } } if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) @@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len) static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, - struct sw_flow_actions **sfa, - bool *skip_copy, __be16 eth_type, bool masked, bool log) + struct sw_flow_actions **sfa, bool *skip_copy, + u8 mac_proto, __be16 eth_type, bool masked, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -2157,9 +2190,12 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: - case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_ETHERNET: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; + case OVS_KEY_ATTR_TUNNEL: if (masked) return -EINVAL; /* Masked tunnel set not supported. */ @@ -2324,6 +2360,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) { + u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; @@ -2346,6 +2383,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), [OVS_ACTION_ATTR_CT] = (u32)-1, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), + [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), + [OVS_ACTION_ATTR_POP_ETH] = 0, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2394,10 +2433,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, } case OVS_ACTION_ATTR_POP_VLAN: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; vlan = nla_data(a); if (!eth_type_vlan(vlan->vlan_tpid)) return -EINVAL; @@ -2447,14 +2490,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, - &skip_copy, eth_type, false, log); + &skip_copy, mac_proto, eth_type, + false, log); if (err) return err; break; case OVS_ACTION_ATTR_SET_MASKED: err = validate_set(a, key, sfa, - &skip_copy, eth_type, true, log); + &skip_copy, mac_proto, eth_type, + true, log); if (err) return err; break; @@ -2474,6 +2519,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_PUSH_ETH: + /* Disallow pushing an Ethernet header if one + * is already present */ + if (mac_proto != MAC_PROTO_NONE) + return -EINVAL; + mac_proto = MAC_PROTO_NONE; + break; + + case OVS_ACTION_ATTR_POP_ETH: + if (mac_proto != MAC_PROTO_ETHERNET) + return -EINVAL; + if (vlan_tci & htons(VLAN_TAG_PRESENT)) + return -EINVAL; + mac_proto = MAC_PROTO_ETHERNET; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index e825753de1e0..0389398fa4ab 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb) if (unlikely(!skb)) return; - skb_push(skb, ETH_HLEN); - skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + if (skb->dev->type == ARPHRD_ETHER) { + skb_push(skb, ETH_HLEN); + skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + } ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); return; error: @@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) } if (vport->dev->flags & IFF_LOOPBACK || - vport->dev->type != ARPHRD_ETHER || + (vport->dev->type != ARPHRD_ETHER && + vport->dev->type != ARPHRD_NONE) || ovs_is_internal_dev(vport->dev)) { err = -EINVAL; goto error_put; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 9bb85b35a1fb..b6c8524032a0 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -464,9 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, return 0; } -static unsigned int packet_length(const struct sk_buff *skb) +static unsigned int packet_length(const struct sk_buff *skb, + struct net_device *dev) { - unsigned int length = skb->len - ETH_HLEN; + unsigned int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) @@ -480,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb) return length; } -void ovs_vport_send(struct vport *vport, struct sk_buff *skb) +void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) { int mtu = vport->dev->mtu; - if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { + switch (vport->dev->type) { + case ARPHRD_NONE: + if (mac_proto == MAC_PROTO_ETHERNET) { + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + skb->protocol = htons(ETH_P_TEB); + } else if (mac_proto != MAC_PROTO_NONE) { + WARN_ON_ONCE(1); + goto drop; + } + break; + case ARPHRD_ETHER: + if (mac_proto != MAC_PROTO_ETHERNET) + goto drop; + break; + default: + goto drop; + } + + if (unlikely(packet_length(skb, vport->dev) > mtu && + !skb_is_gso(skb))) { net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", vport->dev->name, - packet_length(skb), mtu); + packet_length(skb, vport->dev), mtu); vport->dev->stats.tx_errors++; goto drop; } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 46e5b69927c7..cda66c26ad08 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -197,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops); }) void ovs_vport_ops_unregister(struct vport_ops *ops); -void ovs_vport_send(struct vport *vport, struct sk_buff *skb); +void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto); #endif /* vport.h */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fcddacc92e01..3296a6ac583a 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -220,7 +220,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void rds_tcp_tc_info(struct socket *sock, unsigned int len, +static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { @@ -229,6 +229,7 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, unsigned long flags; struct sockaddr_in sin; int sinlen; + struct socket *sock; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); @@ -237,12 +238,17 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len, list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0); - tsinfo.local_addr = sin.sin_addr.s_addr; - tsinfo.local_port = sin.sin_port; - sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1); - tsinfo.peer_addr = sin.sin_addr.s_addr; - tsinfo.peer_port = sin.sin_port; + sock = tc->t_sock; + if (sock) { + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 0); + tsinfo.local_addr = sin.sin_addr.s_addr; + tsinfo.local_port = sin.sin_port; + sock->ops->getname(sock, (struct sockaddr *)&sin, + &sinlen, 1); + tsinfo.peer_addr = sin.sin_addr.s_addr; + tsinfo.peer_port = sin.sin_port; + } tsinfo.hdr_rem = tc->t_tinc_hdr_rem; tsinfo.data_rem = tc->t_tinc_data_rem; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index e0b23fb5b8d5..c9c496844cd7 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -103,7 +103,7 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) if (!peer_is_smaller) return NULL; - for (i = 1; i < npaths; i++) { + for (i = 0; i < npaths; i++) { struct rds_conn_path *cp = &conn->c_path[i]; if (rds_conn_path_transition(cp, RDS_CONN_DOWN, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 44fb8d893c7d..1d87b5453ef7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk) ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(udp_sk, 0, 1, &ret); + skb = skb_recv_udp(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk) __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); - /* The socket buffer we have is owned by UDP, with UDP's data all over - * it, but we really want our own data there. + /* The UDP protocol already released all skb resources; + * we are free to add our own data there. */ - skb_orphan(skb); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index af47bdf2f483..edc720f11687 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -16,7 +16,6 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/dst.h> -#include <net/dst_metadata.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> @@ -67,6 +66,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16}, }; static int tunnel_key_init(struct net *net, struct nlattr *nla, @@ -81,6 +81,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; + __be16 dst_port = 0; __be64 key_id; int ret = 0; int err; @@ -111,6 +112,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT]) + dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]); + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { __be32 saddr; @@ -120,7 +124,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, - TUNNEL_KEY, key_id, 0); + dst_port, TUNNEL_KEY, + key_id, 0); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { struct in6_addr saddr; @@ -130,7 +135,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, - TUNNEL_KEY, key_id, 0); + dst_port, TUNNEL_KEY, + key_id, 0); } if (!metadata) { @@ -258,7 +264,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || tunnel_key_dump_addresses(skb, - ¶ms->tcft_enc_metadata->u.tun_info)) + ¶ms->tcft_enc_metadata->u.tun_info) || + nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst)) goto nla_put_failure; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a8fb1ca03b3e..e8dd09af0d0c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -43,6 +43,7 @@ struct fl_flow_key { struct flow_dissector_key_ipv4_addrs enc_ipv4; struct flow_dissector_key_ipv6_addrs enc_ipv6; }; + struct flow_dissector_key_ports enc_tp; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -155,6 +156,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, } skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + skb_key.enc_tp.src = key->tp_src; + skb_key.enc_tp.dst = key->tp_dst; } skb_key.indev_ifindex = skb->skb_iif; @@ -344,6 +347,14 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_SRC] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_SCTP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -453,6 +464,13 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); + } else if (key->basic.ip_proto == IPPROTO_SCTP) { + fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC, + &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK, + sizeof(key->tp.src)); + fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST, + &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, + sizeof(key->tp.dst)); } if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || @@ -489,6 +507,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id.keyid)); + fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)); + + fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst)); + return 0; } @@ -556,6 +582,18 @@ static void fl_init_dissector(struct cls_fl_head *head, FLOW_DISSECTOR_KEY_PORTS, tp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6); + if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) || + FL_KEY_IS_MASKED(&mask->key, enc_ipv6)) + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL, + enc_control); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); skb_flow_dissector_init(&head->dissector, keys, cnt); } @@ -897,6 +935,14 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_SCTP && + (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC, + &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK, + sizeof(key->tp.src)) || + fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST, + &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK, + sizeof(key->tp.dst)))) + goto nla_put_failure; if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->enc_ipv4.src, @@ -922,7 +968,17 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, &mask->enc_key_id, TCA_FLOWER_UNSPEC, - sizeof(key->enc_key_id))) + sizeof(key->enc_key_id)) || + fl_dump_key_val(skb, &key->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)) || + fl_dump_key_val(skb, &key->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst))) goto nla_put_failure; nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 206dc24add3a..f337f1bdd1d4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -960,6 +960,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev, sch->handle = handle; + /* This exist to keep backward compatible with a userspace + * loophole, what allowed userspace to get IFF_NO_QUEUE + * facility on older kernels by setting tx_queue_len=0 (prior + * to qdisc init), and then forgot to reinit tx_queue_len + * before again attaching a qdisc. + */ + if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; + netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); + } + if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (qdisc_is_percpu_stats(sch)) { sch->cpu_bstats = diff --git a/net/socket.c b/net/socket.c index 970a7ea3fc4a..4ce33c35e606 100644 --- a/net/socket.c +++ b/net/socket.c @@ -518,8 +518,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err) { + struct socket *sock = SOCKET_I(d_inode(dentry)); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e2a55dc787e6..78da4aee3543 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -547,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err >= 0) - skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); if (skb == NULL) { if (err != -EAGAIN) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1758665d609c..7178d0aa7861 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_udp(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); consume_skb(skb); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..87620183910e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); - skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, - &last); + skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip, + &err, &last); if (skb) break; |