diff options
119 files changed, 1943 insertions, 894 deletions
diff --git a/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml b/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml index 5edfbe347341..a31a202afe5c 100644 --- a/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml +++ b/Documentation/devicetree/bindings/leds/leds-pwm-multicolor.yaml @@ -41,6 +41,8 @@ properties: pwm-names: true + active-low: true + color: true required: diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index 49f90cfb4698..af525ed29792 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -292,13 +292,14 @@ operations: - name: get-addr doc: Get endpoint information - attribute-set: endpoint + attribute-set: attr dont-validate: [ strict ] flags: [ uns-admin-perm ] do: &get-addr-attrs request: attributes: - addr + - token reply: attributes: - addr diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 4db6122c9b43..339db6e4a1d5 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -194,15 +194,10 @@ static int bareudp_init(struct net_device *dev) struct bareudp_dev *bareudp = netdev_priv(dev); int err; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - err = gro_cells_init(&bareudp->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); + if (err) return err; - } + return 0; } @@ -211,7 +206,6 @@ static void bareudp_uninit(struct net_device *dev) struct bareudp_dev *bareudp = netdev_priv(dev); gro_cells_destroy(&bareudp->gro_cells); - free_percpu(dev->tstats); } static struct socket *bareudp_create_sock(struct net *net, __be16 port) @@ -529,7 +523,6 @@ static const struct net_device_ops bareudp_netdev_ops = { .ndo_open = bareudp_open, .ndo_stop = bareudp_stop, .ndo_start_xmit = bareudp_xmit, - .ndo_get_stats64 = dev_get_tstats64, .ndo_fill_metadata_dst = bareudp_fill_metadata_dst, }; @@ -567,6 +560,7 @@ static void bareudp_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; } static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 03d966fa67b2..1ae11b180d54 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -966,18 +966,10 @@ mt753x_trap_frames(struct mt7530_priv *priv) MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); } -static int +static void mt753x_cpu_port_enable(struct dsa_switch *ds, int port) { struct mt7530_priv *priv = ds->priv; - int ret; - - /* Setup max capability of CPU port at first */ - if (priv->info->cpu_port_config) { - ret = priv->info->cpu_port_config(ds, port); - if (ret) - return ret; - } /* Enable Mediatek header mode on the cpu port */ mt7530_write(priv, MT7530_PVC_P(port), @@ -1003,8 +995,6 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) /* Set to fallback mode for independent VLAN learning */ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, MT7530_PORT_FALLBACK_MODE); - - return 0; } static int @@ -1028,7 +1018,6 @@ mt7530_port_enable(struct dsa_switch *ds, int port, priv->ports[port].enable = true; mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK, priv->ports[port].pm); - mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK); mutex_unlock(&priv->reg_mutex); @@ -1048,7 +1037,6 @@ mt7530_port_disable(struct dsa_switch *ds, int port) priv->ports[port].enable = false; mt7530_rmw(priv, MT7530_PCR_P(port), PCR_MATRIX_MASK, PCR_MATRIX_CLR); - mt7530_clear(priv, MT7530_PMCR_P(port), PMCR_LINK_SETTINGS_MASK); mutex_unlock(&priv->reg_mutex); } @@ -2055,7 +2043,7 @@ mt7530_setup_irq(struct mt7530_priv *priv) } /* This register must be set for MT7530 to properly fire interrupts */ - if (priv->id != ID_MT7531) + if (priv->id == ID_MT7530 || priv->id == ID_MT7621) mt7530_set(priv, MT7530_TOP_SIG_CTRL, TOP_SIG_CTRL_NORMAL); ret = request_threaded_irq(priv->irq, NULL, mt7530_irq_thread_fn, @@ -2261,14 +2249,18 @@ mt7530_setup(struct dsa_switch *ds) val |= MHWTRAP_MANUAL; mt7530_write(priv, MT7530_MHWTRAP, val); - priv->p6_interface = PHY_INTERFACE_MODE_NA; - mt753x_trap_frames(priv); /* Enable and reset MIB counters */ mt7530_mib_reset(ds); for (i = 0; i < MT7530_NUM_PORTS; i++) { + /* Clear link settings and enable force mode to force link down + * on all ports until they're enabled later. + */ + mt7530_rmw(priv, MT7530_PMCR_P(i), PMCR_LINK_SETTINGS_MASK | + PMCR_FORCE_MODE, PMCR_FORCE_MODE); + /* Disable forwarding by default on all ports */ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); @@ -2277,9 +2269,7 @@ mt7530_setup(struct dsa_switch *ds) mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); if (dsa_is_cpu_port(ds, i)) { - ret = mt753x_cpu_port_enable(ds, i); - if (ret) - return ret; + mt753x_cpu_port_enable(ds, i); } else { mt7530_port_disable(ds, i); @@ -2373,6 +2363,12 @@ mt7531_setup_common(struct dsa_switch *ds) UNU_FFP_MASK); for (i = 0; i < MT7530_NUM_PORTS; i++) { + /* Clear link settings and enable force mode to force link down + * on all ports until they're enabled later. + */ + mt7530_rmw(priv, MT7530_PMCR_P(i), PMCR_LINK_SETTINGS_MASK | + MT7531_FORCE_MODE, MT7531_FORCE_MODE); + /* Disable forwarding by default on all ports */ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); @@ -2383,9 +2379,7 @@ mt7531_setup_common(struct dsa_switch *ds) mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR); if (dsa_is_cpu_port(ds, i)) { - ret = mt753x_cpu_port_enable(ds, i); - if (ret) - return ret; + mt753x_cpu_port_enable(ds, i); } else { mt7530_port_disable(ds, i); @@ -2451,14 +2445,12 @@ mt7531_setup(struct dsa_switch *ds) val = mt7530_read(priv, MT7531_TOP_SIG_SR); priv->p5_sgmii = !!(val & PAD_DUAL_SGMII_EN); - /* all MACs must be forced link-down before sw reset */ + /* Force link down on all ports before internal reset */ for (i = 0; i < MT7530_NUM_PORTS; i++) mt7530_write(priv, MT7530_PMCR_P(i), MT7531_FORCE_LNK); /* Reset the switch through internal reset */ - mt7530_write(priv, MT7530_SYS_CTRL, - SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | - SYS_CTRL_REG_RST); + mt7530_write(priv, MT7530_SYS_CTRL, SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); if (!priv->p5_sgmii) { mt7531_pll_setup(priv); @@ -2476,10 +2468,6 @@ mt7531_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK, MT7531_GPIO0_INTERRUPT); - /* Let phylink decide the interface later. */ - priv->p5_interface = PHY_INTERFACE_MODE_NA; - priv->p6_interface = PHY_INTERFACE_MODE_NA; - /* Enable PHY core PLL, since phy_device has not yet been created * provided for phy_[read,write]_mmd_indirect is called, we provide * our own mt7531_ind_mmd_phy_[read,write] to complete this @@ -2589,7 +2577,7 @@ static void mt7988_mac_port_get_caps(struct dsa_switch *ds, int port, } } -static int +static void mt7530_mac_config(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) { @@ -2599,22 +2587,14 @@ mt7530_mac_config(struct dsa_switch *ds, int port, unsigned int mode, mt7530_setup_port5(priv->ds, interface); else if (port == 6) mt7530_setup_port6(priv->ds, interface); - - return 0; } -static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port, - phy_interface_t interface, - struct phy_device *phydev) +static void mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port, + phy_interface_t interface, + struct phy_device *phydev) { u32 val; - if (priv->p5_sgmii) { - dev_err(priv->dev, "RGMII mode is not available for port %d\n", - port); - return -EINVAL; - } - val = mt7530_read(priv, MT7531_CLKGEN_CTRL); val |= GP_CLK_EN; val &= ~GP_MODE_MASK; @@ -2642,31 +2622,14 @@ static int mt7531_rgmii_setup(struct mt7530_priv *priv, u32 port, case PHY_INTERFACE_MODE_RGMII_ID: break; default: - return -EINVAL; + break; } } - mt7530_write(priv, MT7531_CLKGEN_CTRL, val); - - return 0; -} -static bool mt753x_is_mac_port(u32 port) -{ - return (port == 5 || port == 6); -} - -static int -mt7988_mac_config(struct dsa_switch *ds, int port, unsigned int mode, - phy_interface_t interface) -{ - if (dsa_is_cpu_port(ds, port) && - interface == PHY_INTERFACE_MODE_INTERNAL) - return 0; - - return -EINVAL; + mt7530_write(priv, MT7531_CLKGEN_CTRL, val); } -static int +static void mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface) { @@ -2674,39 +2637,11 @@ mt7531_mac_config(struct dsa_switch *ds, int port, unsigned int mode, struct phy_device *phydev; struct dsa_port *dp; - if (!mt753x_is_mac_port(port)) { - dev_err(priv->dev, "port %d is not a MAC port\n", port); - return -EINVAL; - } - - switch (interface) { - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: + if (phy_interface_mode_is_rgmii(interface)) { dp = dsa_to_port(ds, port); phydev = dp->user->phydev; - return mt7531_rgmii_setup(priv, port, interface, phydev); - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_NA: - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - /* handled in SGMII PCS driver */ - return 0; - default: - return -EINVAL; + mt7531_rgmii_setup(priv, port, interface, phydev); } - - return -EINVAL; -} - -static int -mt753x_mac_config(struct dsa_switch *ds, int port, unsigned int mode, - const struct phylink_link_state *state) -{ - struct mt7530_priv *priv = ds->priv; - - return priv->info->mac_port_config(ds, port, mode, state->interface); } static struct phylink_pcs * @@ -2732,52 +2667,13 @@ mt753x_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state) { struct mt7530_priv *priv = ds->priv; - u32 mcr_cur, mcr_new; - - switch (port) { - case 0 ... 4: - if (state->interface != PHY_INTERFACE_MODE_GMII && - state->interface != PHY_INTERFACE_MODE_INTERNAL) - goto unsupported; - break; - case 5: - if (priv->p5_interface == state->interface) - break; - if (mt753x_mac_config(ds, port, mode, state) < 0) - goto unsupported; - - if (priv->p5_intf_sel != P5_DISABLED) - priv->p5_interface = state->interface; - break; - case 6: - if (priv->p6_interface == state->interface) - break; - - if (mt753x_mac_config(ds, port, mode, state) < 0) - goto unsupported; - - priv->p6_interface = state->interface; - break; - default: -unsupported: - dev_err(ds->dev, "%s: unsupported %s port: %i\n", - __func__, phy_modes(state->interface), port); - return; - } - - mcr_cur = mt7530_read(priv, MT7530_PMCR_P(port)); - mcr_new = mcr_cur; - mcr_new &= ~PMCR_LINK_SETTINGS_MASK; - mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN | - PMCR_BACKPR_EN | PMCR_FORCE_MODE_ID(priv->id); + if ((port == 5 || port == 6) && priv->info->mac_port_config) + priv->info->mac_port_config(ds, port, mode, state->interface); /* Are we connected to external phy */ if (port == 5 && dsa_is_user_port(ds, 5)) - mcr_new |= PMCR_EXT_PHY; - - if (mcr_new != mcr_cur) - mt7530_write(priv, MT7530_PMCR_P(port), mcr_new); + mt7530_set(priv, MT7530_PMCR_P(port), PMCR_EXT_PHY); } static void mt753x_phylink_mac_link_down(struct dsa_switch *ds, int port, @@ -2801,17 +2697,10 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, mcr = PMCR_RX_EN | PMCR_TX_EN | PMCR_FORCE_LNK; - /* MT753x MAC works in 1G full duplex mode for all up-clocked - * variants. - */ - if (interface == PHY_INTERFACE_MODE_TRGMII || - (phy_interface_mode_is_8023z(interface))) { - speed = SPEED_1000; - duplex = DUPLEX_FULL; - } - switch (speed) { case SPEED_1000: + case SPEED_2500: + case SPEED_10000: mcr |= PMCR_FORCE_SPEED_1000; break; case SPEED_100: @@ -2829,6 +2718,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, false) >= 0) { switch (speed) { case SPEED_1000: + case SPEED_2500: mcr |= PMCR_FORCE_EEE1G; break; case SPEED_100: @@ -2840,63 +2730,6 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, mt7530_set(priv, MT7530_PMCR_P(port), mcr); } -static int -mt7531_cpu_port_config(struct dsa_switch *ds, int port) -{ - struct mt7530_priv *priv = ds->priv; - phy_interface_t interface; - int speed; - int ret; - - switch (port) { - case 5: - if (!priv->p5_sgmii) - interface = PHY_INTERFACE_MODE_RGMII; - else - interface = PHY_INTERFACE_MODE_2500BASEX; - - priv->p5_interface = interface; - break; - case 6: - interface = PHY_INTERFACE_MODE_2500BASEX; - - priv->p6_interface = interface; - break; - default: - return -EINVAL; - } - - if (interface == PHY_INTERFACE_MODE_2500BASEX) - speed = SPEED_2500; - else - speed = SPEED_1000; - - ret = mt7531_mac_config(ds, port, MLO_AN_FIXED, interface); - if (ret) - return ret; - mt7530_write(priv, MT7530_PMCR_P(port), - PMCR_CPU_PORT_SETTING(priv->id)); - mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, interface, NULL, - speed, DUPLEX_FULL, true, true); - - return 0; -} - -static int -mt7988_cpu_port_config(struct dsa_switch *ds, int port) -{ - struct mt7530_priv *priv = ds->priv; - - mt7530_write(priv, MT7530_PMCR_P(port), - PMCR_CPU_PORT_SETTING(priv->id)); - - mt753x_phylink_mac_link_up(ds, port, MLO_AN_FIXED, - PHY_INTERFACE_MODE_INTERNAL, NULL, - SPEED_10000, DUPLEX_FULL, true, true); - - return 0; -} - static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port, struct phylink_config *config) { @@ -2979,17 +2812,9 @@ static int mt753x_setup(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; - int i, ret; - - /* Initialise the PCS devices */ - for (i = 0; i < priv->ds->num_ports; i++) { - priv->pcs[i].pcs.ops = priv->info->pcs_ops; - priv->pcs[i].pcs.neg_mode = true; - priv->pcs[i].priv = priv; - priv->pcs[i].port = i; - } + int ret = priv->info->sw_setup(ds); + int i; - ret = priv->info->sw_setup(ds); if (ret) return ret; @@ -3001,6 +2826,14 @@ mt753x_setup(struct dsa_switch *ds) if (ret && priv->irq) mt7530_free_irq_common(priv); + /* Initialise the PCS devices */ + for (i = 0; i < priv->ds->num_ports; i++) { + priv->pcs[i].pcs.ops = priv->info->pcs_ops; + priv->pcs[i].pcs.neg_mode = true; + priv->pcs[i].priv = priv; + priv->pcs[i].port = i; + } + if (priv->create_sgmii) { ret = priv->create_sgmii(priv); if (ret && priv->irq) @@ -3155,7 +2988,6 @@ const struct mt753x_info mt753x_table[] = { .phy_write_c22 = mt7531_ind_c22_phy_write, .phy_read_c45 = mt7531_ind_c45_phy_read, .phy_write_c45 = mt7531_ind_c45_phy_write, - .cpu_port_config = mt7531_cpu_port_config, .mac_port_get_caps = mt7531_mac_port_get_caps, .mac_port_config = mt7531_mac_config, }, @@ -3167,9 +2999,7 @@ const struct mt753x_info mt753x_table[] = { .phy_write_c22 = mt7531_ind_c22_phy_write, .phy_read_c45 = mt7531_ind_c45_phy_read, .phy_write_c45 = mt7531_ind_c45_phy_write, - .cpu_port_config = mt7988_cpu_port_config, .mac_port_get_caps = mt7988_mac_port_get_caps, - .mac_port_config = mt7988_mac_config, }, }; EXPORT_SYMBOL_GPL(mt753x_table); @@ -3197,8 +3027,7 @@ mt7530_probe_common(struct mt7530_priv *priv) * properly. */ if (!priv->info->sw_setup || !priv->info->phy_read_c22 || - !priv->info->phy_write_c22 || !priv->info->mac_port_get_caps || - !priv->info->mac_port_config) + !priv->info->phy_write_c22 || !priv->info->mac_port_get_caps) return -EINVAL; priv->id = priv->info->id; diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 26a6d2160c08..a71166e0a7fc 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -304,20 +304,11 @@ enum mt7530_vlan_port_acc_frm { MT7531_FORCE_DPX | \ MT7531_FORCE_RX_FC | \ MT7531_FORCE_TX_FC) -#define PMCR_FORCE_MODE_ID(id) ((((id) == ID_MT7531) || ((id) == ID_MT7988)) ? \ - MT7531_FORCE_MODE : PMCR_FORCE_MODE) #define PMCR_LINK_SETTINGS_MASK (PMCR_TX_EN | PMCR_FORCE_SPEED_1000 | \ PMCR_RX_EN | PMCR_FORCE_SPEED_100 | \ PMCR_TX_FC_EN | PMCR_RX_FC_EN | \ PMCR_FORCE_FDX | PMCR_FORCE_LNK | \ PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100) -#define PMCR_CPU_PORT_SETTING(id) (PMCR_FORCE_MODE_ID((id)) | \ - PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \ - PMCR_BACKOFF_EN | PMCR_BACKPR_EN | \ - PMCR_TX_EN | PMCR_RX_EN | \ - PMCR_TX_FC_EN | PMCR_RX_FC_EN | \ - PMCR_FORCE_SPEED_1000 | \ - PMCR_FORCE_FDX | PMCR_FORCE_LNK) #define MT7530_PMEEECR_P(x) (0x3004 + (x) * 0x100) #define WAKEUP_TIME_1000(x) (((x) & 0xFF) << 24) @@ -724,15 +715,14 @@ struct mt753x_info { int regnum); int (*phy_write_c45)(struct mt7530_priv *priv, int port, int devad, int regnum, u16 val); - int (*cpu_port_config)(struct dsa_switch *ds, int port); void (*mac_port_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); void (*mac_port_validate)(struct dsa_switch *ds, int port, phy_interface_t interface, unsigned long *supported); - int (*mac_port_config)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); + void (*mac_port_config)(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface); }; /* struct mt7530_priv - This is the main data structure for holding the state @@ -750,7 +740,6 @@ struct mt753x_info { * @ports: Holding the state among ports * @reg_mutex: The lock for protecting among process accessing * registers - * @p6_interface Holding the current port 6 interface * @p5_intf_sel: Holding the current port 5 interface select * @p5_sgmii: Flag for distinguishing if port 5 of the MT7531 switch * has got SGMII @@ -772,8 +761,6 @@ struct mt7530_priv { const struct mt753x_info *info; unsigned int id; bool mcm; - phy_interface_t p6_interface; - phy_interface_t p5_interface; enum p5_interface_select p5_intf_sel; bool p5_sgmii; u8 mirror_rx; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 528441b28c4e..c9b6acd8c892 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3538,7 +3538,7 @@ static u8 bnx2x_set_pbd_csum_enc(struct bnx2x *bp, struct sk_buff *skb, u32 *parsing_data, u32 xmit_type) { *parsing_data |= - ((((u8 *)skb_inner_transport_header(skb) - skb->data) >> 1) << + ((skb_inner_transport_offset(skb) >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; @@ -3570,7 +3570,7 @@ static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb, u32 *parsing_data, u32 xmit_type) { *parsing_data |= - ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) << + ((skb_transport_offset(skb) >> 1) << ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) & ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W; @@ -3613,7 +3613,7 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, struct eth_tx_parse_bd_e1x *pbd, u32 xmit_type) { - u8 hlen = (skb_network_header(skb) - skb->data) >> 1; + u8 hlen = skb_network_offset(skb) >> 1; /* for now NS flag is not used in Linux */ pbd->global_data = @@ -3621,8 +3621,7 @@ static u8 bnx2x_set_pbd_csum(struct bnx2x *bp, struct sk_buff *skb, ((skb->protocol == cpu_to_be16(ETH_P_8021Q)) << ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT)); - pbd->ip_hlen_w = (skb_transport_header(skb) - - skb_network_header(skb)) >> 1; + pbd->ip_hlen_w = skb_network_header_len(skb) >> 1; hlen += pbd->ip_hlen_w; @@ -3667,8 +3666,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb, u8 outerip_off, outerip_len = 0; /* from outer IP to transport */ - hlen_w = (skb_inner_transport_header(skb) - - skb_network_header(skb)) >> 1; + hlen_w = skb_inner_transport_offset(skb) >> 1; /* transport len */ hlen_w += inner_tcp_hdrlen(skb) >> 1; @@ -3714,7 +3712,7 @@ static void bnx2x_update_pbds_gso_enc(struct sk_buff *skb, 0, IPPROTO_TCP, 0)); } - outerip_off = (skb_network_header(skb) - skb->data) >> 1; + outerip_off = (skb_network_offset(skb)) >> 1; *global_data |= outerip_off | diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index fd290f3ad6ec..4814c96d5fe7 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -9,6 +9,7 @@ #include <linux/dma-mapping.h> #include <linux/dmapool.h> +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/u64_stats_sync.h> @@ -51,12 +52,16 @@ #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 +#define GVE_MAX_RX_BUFFER_SIZE 4096 + #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 #define GVE_XDP_ACTIONS 5 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 +#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 + #define DQO_QPL_DEFAULT_TX_PAGES 512 #define DQO_QPL_DEFAULT_RX_PAGES 2048 @@ -150,6 +155,11 @@ struct gve_rx_compl_queue_dqo { u32 mask; /* Mask for indices to the size of the ring */ }; +struct gve_header_buf { + u8 *data; + dma_addr_t addr; +}; + /* Stores state for tracking buffers posted to HW */ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ @@ -252,19 +262,26 @@ struct gve_rx_ring { /* track number of used buffers */ u16 used_buf_states_cnt; + + /* Address info of the buffers for header-split */ + struct gve_header_buf hdr_bufs; } dqo; }; u64 rbytes; /* free-running bytes received */ + u64 rx_hsplit_bytes; /* free-running header bytes received */ u64 rpackets; /* free-running packets received */ u32 cnt; /* free-running total number of completed packets */ u32 fill_cnt; /* free-running total number of descs and buffs posted */ u32 mask; /* masks the cnt and fill_cnt to the size of the ring */ + u64 rx_hsplit_pkt; /* free-running packets with headers split */ u64 rx_copybreak_pkt; /* free-running count of copybreak packets */ u64 rx_copied_pkt; /* free-running total number of copied packets */ u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */ u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */ u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */ + /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */ + u64 rx_hsplit_unsplit_pkt; u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */ u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ @@ -664,6 +681,7 @@ struct gve_rx_alloc_rings_cfg { struct gve_qpl_config *qpl_cfg; u16 ring_size; + u16 packet_buffer_size; bool raw_addressing; bool enable_header_split; @@ -778,13 +796,17 @@ struct gve_priv { struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ - int data_buffer_size_dqo; + u16 data_buffer_size_dqo; + u16 max_rx_buffer_size; /* device limit */ enum gve_queue_format queue_format; /* Interrupt coalescing settings */ u32 tx_coalesce_usecs; u32 rx_coalesce_usecs; + + u16 header_buf_size; /* device configured, header-split supported if non-zero */ + bool header_split_enabled; /* True if the header split is enabled by the user */ }; enum gve_service_task_flags_bit { @@ -1122,6 +1144,9 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg); void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); +bool gve_header_split_supported(const struct gve_priv *priv); +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 12fbd723ecc6..ae12ac38e18b 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -40,7 +40,8 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); u16 option_length = be16_to_cpu(option->option_length); @@ -147,6 +148,23 @@ void gve_parse_device_option(struct gve_priv *priv, } *dev_op_jumbo_frames = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_BUFFER_SIZES: + if (option_length < sizeof(**dev_op_buffer_sizes) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Buffer Sizes", + (int)sizeof(**dev_op_buffer_sizes), + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_buffer_sizes)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Buffer Sizes"); + *dev_op_buffer_sizes = (void *)(option + 1); + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -164,7 +182,8 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) { const int num_options = be16_to_cpu(descriptor->num_device_options); struct gve_device_option *dev_opt; @@ -185,7 +204,7 @@ gve_process_device_options(struct gve_priv *priv, gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_rda, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_jumbo_frames, - dev_op_dqo_qpl); + dev_op_dqo_qpl, dev_op_buffer_sizes); dev_opt = next_opt; } @@ -640,6 +659,9 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) cpu_to_be16(rx_buff_ring_entries); cmd.create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); + if (priv->header_split_enabled) + cmd.create_rx_queue.header_buffer_size = + cpu_to_be16(priv->header_buf_size); } return gve_adminq_issue_cmd(priv, &cmd); @@ -755,7 +777,9 @@ static void gve_enable_supported_features(struct gve_priv *priv, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames, const struct gve_device_option_dqo_qpl - *dev_op_dqo_qpl) + *dev_op_dqo_qpl, + const struct gve_device_option_buffer_sizes + *dev_op_buffer_sizes) { /* Before control reaches this point, the page-size-capped max MTU from * the gve_device_descriptor field has already been stored in @@ -779,10 +803,22 @@ static void gve_enable_supported_features(struct gve_priv *priv, if (priv->rx_pages_per_qpl == 0) priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES; } + + if (dev_op_buffer_sizes && + (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) { + priv->max_rx_buffer_size = + be16_to_cpu(dev_op_buffer_sizes->packet_buffer_size); + priv->header_buf_size = + be16_to_cpu(dev_op_buffer_sizes->header_buffer_size); + dev_info(&priv->pdev->dev, + "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n", + priv->max_rx_buffer_size, priv->header_buf_size); + } } int gve_adminq_describe_device(struct gve_priv *priv) { + struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; @@ -816,7 +852,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, &dev_op_gqi_qpl, &dev_op_dqo_rda, &dev_op_jumbo_frames, - &dev_op_dqo_qpl); + &dev_op_dqo_qpl, + &dev_op_buffer_sizes); if (err) goto free_device_descriptor; @@ -885,7 +922,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames, dev_op_dqo_qpl); + dev_op_jumbo_frames, dev_op_dqo_qpl, + dev_op_buffer_sizes); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 5865ccdccbd0..5ac972e45ff8 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -125,6 +125,15 @@ struct gve_device_option_jumbo_frames { static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8); +struct gve_device_option_buffer_sizes { + /* GVE_SUP_BUFFER_SIZES_MASK bit should be set */ + __be32 supported_features_mask; + __be16 packet_buffer_size; + __be16 header_buffer_size; +}; + +static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -140,6 +149,7 @@ enum gve_dev_opt_id { GVE_DEV_OPT_ID_DQO_RDA = 0x4, GVE_DEV_OPT_ID_DQO_QPL = 0x7, GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, }; enum gve_dev_opt_req_feat_mask { @@ -149,10 +159,12 @@ enum gve_dev_opt_req_feat_mask { GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, }; enum gve_sup_feature_mask { GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 @@ -165,6 +177,7 @@ enum gve_driver_capbility { gve_driver_capability_dqo_qpl = 2, /* reserved for future use */ gve_driver_capability_dqo_rda = 3, gve_driver_capability_alt_miss_compl = 4, + gve_driver_capability_flexible_buffer_size = 5, }; #define GVE_CAP1(a) BIT((int)a) @@ -176,7 +189,8 @@ enum gve_driver_capbility { (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ GVE_CAP1(gve_driver_capability_gqi_rda) | \ GVE_CAP1(gve_driver_capability_dqo_rda) | \ - GVE_CAP1(gve_driver_capability_alt_miss_compl)) + GVE_CAP1(gve_driver_capability_alt_miss_compl) | \ + GVE_CAP1(gve_driver_capability_flexible_buffer_size)) #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 @@ -260,7 +274,9 @@ struct gve_adminq_create_rx_queue { __be16 packet_buffer_size; __be16 rx_buff_ring_size; u8 enable_rsc; - u8 padding[5]; + u8 padding1; + __be16 header_buffer_size; + u8 padding2[2]; }; static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56); diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index e5397aa1e48f..9aebfb843d9d 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -4,7 +4,6 @@ * Copyright (C) 2015-2021 Google, Inc. */ -#include <linux/ethtool.h> #include <linux/rtnetlink.h> #include "gve.h" #include "gve_adminq.h" @@ -40,17 +39,18 @@ static u32 gve_get_msglevel(struct net_device *netdev) * as declared in enum xdp_action inside file uapi/linux/bpf.h . */ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { - "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", - "rx_dropped", "tx_dropped", "tx_timeouts", + "rx_packets", "rx_hsplit_pkt", "tx_packets", "rx_bytes", + "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts", "rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt", + "rx_hsplit_unsplit_pkt", "interface_up_cnt", "interface_down_cnt", "reset_cnt", "page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt", }; static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { - "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]", - "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", - "rx_frag_alloc_cnt[%u]", + "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", + "rx_bytes[%u]", "rx_hsplit_bytes[%u]", "rx_cont_packet_cnt[%u]", + "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_frag_alloc_cnt[%u]", "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", @@ -154,11 +154,13 @@ static void gve_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { - u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail, - tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt, + u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes, + tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, + tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt, tmp_tx_pkts, tmp_tx_bytes; - u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts, - rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped; + u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt, + rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, + tx_dropped; int stats_idx, base_stats_idx, max_stats_idx; struct stats *report_stats; int *rx_qid_to_stats_idx; @@ -185,8 +187,10 @@ gve_get_ethtool_stats(struct net_device *netdev, kfree(rx_qid_to_stats_idx); return; } - for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0, - rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0; + for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, + rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, + rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, + ring = 0; ring < priv->rx_cfg.num_queues; ring++) { if (priv->rx) { do { @@ -195,18 +199,23 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; + tmp_rx_hsplit_pkt = rx->rx_hsplit_pkt; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; + tmp_rx_hsplit_unsplit_pkt = + rx->rx_hsplit_unsplit_pkt; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; + rx_hsplit_pkt += tmp_rx_hsplit_pkt; rx_bytes += tmp_rx_bytes; rx_skb_alloc_fail += tmp_rx_skb_alloc_fail; rx_buf_alloc_fail += tmp_rx_buf_alloc_fail; rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; + rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt; } } for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; @@ -227,6 +236,7 @@ gve_get_ethtool_stats(struct net_device *netdev, i = 0; data[i++] = rx_pkts; + data[i++] = rx_hsplit_pkt; data[i++] = tx_pkts; data[i++] = rx_bytes; data[i++] = tx_bytes; @@ -238,6 +248,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx_skb_alloc_fail; data[i++] = rx_buf_alloc_fail; data[i++] = rx_desc_err_dropped_pkt; + data[i++] = rx_hsplit_unsplit_pkt; data[i++] = priv->interface_up_cnt; data[i++] = priv->interface_down_cnt; data[i++] = priv->reset_cnt; @@ -277,6 +288,7 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; + tmp_rx_hsplit_bytes = rx->rx_hsplit_bytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = @@ -284,6 +296,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; + data[i++] = tmp_rx_hsplit_bytes; data[i++] = rx->rx_cont_packet_cnt; data[i++] = rx->rx_frag_flip_cnt; data[i++] = rx->rx_frag_copy_cnt; @@ -480,6 +493,29 @@ static void gve_get_ringparam(struct net_device *netdev, cmd->tx_max_pending = priv->tx_desc_cnt; cmd->rx_pending = priv->rx_desc_cnt; cmd->tx_pending = priv->tx_desc_cnt; + + if (!gve_header_split_supported(priv)) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + else if (priv->header_split_enabled) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; + else + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +static int gve_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *cmd, + struct kernel_ethtool_ringparam *kernel_cmd, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + + if (priv->tx_desc_cnt != cmd->tx_pending || + priv->rx_desc_cnt != cmd->rx_pending) { + dev_info(&priv->pdev->dev, "Modify ring size is not supported.\n"); + return -EOPNOTSUPP; + } + + return gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); } static int gve_user_reset(struct net_device *netdev, u32 *flags) @@ -655,6 +691,7 @@ static int gve_set_coalesce(struct net_device *netdev, const struct ethtool_ops gve_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_drvinfo = gve_get_drvinfo, .get_strings = gve_get_strings, .get_sset_count = gve_get_sset_count, @@ -667,6 +704,7 @@ const struct ethtool_ops gve_ethtool_ops = { .get_coalesce = gve_get_coalesce, .set_coalesce = gve_set_coalesce, .get_ringparam = gve_get_ringparam, + .set_ringparam = gve_set_ringparam, .reset = gve_user_reset, .get_tunable = gve_get_tunable, .set_tunable = gve_set_tunable, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index db6d9ae7cd78..166bd827a6d7 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1276,17 +1276,10 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) static void gve_drain_page_cache(struct gve_priv *priv) { - struct page_frag_cache *nc; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - nc = &priv->rx[i].page_cache; - if (nc->va) { - __page_frag_cache_drain(virt_to_page(nc->va), - nc->pagecnt_bias); - nc->va = NULL; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + page_frag_cache_drain(&priv->rx[i].page_cache); } static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv, @@ -1307,9 +1300,13 @@ static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, cfg->qcfg = &priv->rx_cfg; cfg->qcfg_tx = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); + cfg->enable_header_split = priv->header_split_enabled; cfg->qpls = priv->qpls; cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->rx_desc_cnt; + cfg->packet_buffer_size = gve_is_gqi(priv) ? + GVE_DEFAULT_RX_BUFFER_SIZE : + priv->data_buffer_size_dqo; cfg->rx = priv->rx; } @@ -1448,12 +1445,9 @@ static int gve_queues_start(struct gve_priv *priv, if (err) goto reset; - if (!gve_is_gqi(priv)) { - /* Hard code this for now. This may be tuned in the future for - * performance. - */ - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; - } + priv->header_split_enabled = rx_alloc_cfg->enable_header_split; + priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; + err = gve_create_rings(priv); if (err) goto reset; @@ -2065,6 +2059,56 @@ out: priv->tx_timeo_cnt++; } +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) +{ + if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) + return GVE_MAX_RX_BUFFER_SIZE; + else + return GVE_DEFAULT_RX_BUFFER_SIZE; +} + +/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +bool gve_header_split_supported(const struct gve_priv *priv) +{ + return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; +} + +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; + bool enable_hdr_split; + int err = 0; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + return 0; + + if (!gve_header_split_supported(priv)) { + dev_err(&priv->pdev->dev, "Header-split not supported\n"); + return -EOPNOTSUPP; + } + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + enable_hdr_split = true; + else + enable_hdr_split = false; + + if (enable_hdr_split == priv->header_split_enabled) + return 0; + + gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, + &tx_alloc_cfg, &rx_alloc_cfg); + + rx_alloc_cfg.enable_header_split = enable_hdr_split; + rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); + + if (netif_running(priv->dev)) + err = gve_adjust_config(priv, &qpls_alloc_cfg, + &tx_alloc_cfg, &rx_alloc_cfg); + return err; +} + static int gve_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2511,6 +2555,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; + priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); priv->gve_wq = alloc_ordered_workqueue("gve", 0); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 8e6aeb5b3ed4..8e8071308aeb 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -199,6 +199,18 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx, return 0; } +static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) +{ + struct device *hdev = &priv->pdev->dev; + int buf_count = rx->dqo.bufq.mask + 1; + + if (rx->dqo.hdr_bufs.data) { + dma_free_coherent(hdev, priv->header_buf_size * buf_count, + rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); + rx->dqo.hdr_bufs.data = NULL; + } +} + void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -258,9 +270,24 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, kvfree(rx->dqo.buf_states); rx->dqo.buf_states = NULL; + gve_rx_free_hdr_bufs(priv, rx); + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } +static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) +{ + struct device *hdev = &priv->pdev->dev; + int buf_count = rx->dqo.bufq.mask + 1; + + rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, + &rx->dqo.hdr_bufs.addr, GFP_KERNEL); + if (!rx->dqo.hdr_bufs.data) + return -ENOMEM; + + return 0; +} + void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -302,6 +329,11 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, if (!rx->dqo.buf_states) return -ENOMEM; + /* Allocate header buffers for header-split */ + if (cfg->enable_header_split) + if (gve_rx_alloc_hdr_bufs(priv, rx)) + goto err; + /* Set up linked list of buffer IDs */ for (i = 0; i < rx->dqo.num_buf_states - 1; i++) rx->dqo.buf_states[i].next = i + 1; @@ -443,6 +475,10 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); desc->buf_addr = cpu_to_le64(buf_state->addr + buf_state->page_info.page_offset); + if (rx->dqo.hdr_bufs.data) + desc->header_buf_addr = + cpu_to_le64(rx->dqo.hdr_bufs.addr + + priv->header_buf_size * bufq->tail); bufq->tail = (bufq->tail + 1) & bufq->mask; complq->num_free_slots--; @@ -458,7 +494,7 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { - const int data_buffer_size = priv->data_buffer_size_dqo; + const u16 data_buffer_size = priv->data_buffer_size_dqo; int pagecount; /* Can't reuse if we only fit one buffer per page */ @@ -645,13 +681,16 @@ static int gve_rx_append_frags(struct napi_struct *napi, */ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, const struct gve_rx_compl_desc_dqo *compl_desc, - int queue_idx) + u32 desc_idx, int queue_idx) { const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); + const bool hbo = compl_desc->header_buffer_overflow; const bool eop = compl_desc->end_of_packet != 0; + const bool hsplit = compl_desc->split_header; struct gve_rx_buf_state_dqo *buf_state; struct gve_priv *priv = rx->gve; u16 buf_len; + u16 hdr_len; if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", @@ -672,12 +711,35 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, } buf_len = compl_desc->packet_len; + hdr_len = compl_desc->header_len; /* Page might have not been used for awhile and was likely last written * by a different thread. */ prefetch(buf_state->page_info.page); + /* Copy the header into the skb in the case of header split */ + if (hsplit) { + int unsplit = 0; + + if (hdr_len && !hbo) { + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, + rx->dqo.hdr_bufs.data + + desc_idx * priv->header_buf_size, + hdr_len); + if (unlikely(!rx->ctx.skb_head)) + goto error; + rx->ctx.skb_tail = rx->ctx.skb_head; + } else { + unsplit = 1; + } + u64_stats_update_begin(&rx->statss); + rx->rx_hsplit_pkt++; + rx->rx_hsplit_unsplit_pkt += unsplit; + rx->rx_hsplit_bytes += hdr_len; + u64_stats_update_end(&rx->statss); + } + /* Sync the portion of dma buffer for CPU to read. */ dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, buf_state->page_info.page_offset, @@ -820,7 +882,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* Do not read data until we own the descriptor */ dma_rmb(); - err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num); + err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); if (err < 0) { gve_rx_free_skb(rx); u64_stats_update_begin(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 535b1796b91d..2349750075a5 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -64,11 +64,9 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) rx->ntfy_id = ntfy_idx; } -struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len) +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len) { - void *va = page_info->page_address + page_info->page_offset + - page_info->pad; struct sk_buff *skb; skb = napi_alloc_skb(napi, len); @@ -76,12 +74,21 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, return NULL; __skb_put(skb, len); - skb_copy_to_linear_data_offset(skb, 0, va, len); + skb_copy_to_linear_data_offset(skb, 0, data, len); skb->protocol = eth_type_trans(skb, dev); return skb; } +struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, u16 len) +{ + void *va = page_info->page_address + page_info->page_offset + + page_info->pad; + + return gve_rx_copy_data(dev, napi, va, len); +} + void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info) { page_info->pagecnt_bias--; diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h index 277921a629f7..bf2e9a0adb36 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.h +++ b/drivers/net/ethernet/google/gve/gve_utils.h @@ -19,6 +19,9 @@ bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx); void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx); +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len); + struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, u16 len); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index f1695c889d3a..19668a8d22f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2473,9 +2473,9 @@ static netdev_features_t hns3_features_check(struct sk_buff *skb, return features; if (skb->encapsulation) - len = skb_inner_transport_header(skb) - skb->data; + len = skb_inner_transport_offset(skb); else - len = skb_transport_header(skb) - skb->data; + len = skb_transport_offset(skb); /* Assume L4 is 60 byte as TCP is the only protocol with a * a flexible value, and it's max len is 60 bytes. diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 767358b60507..639fbb12bd35 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -372,6 +372,7 @@ config IGC config IGC_LEDS def_bool LEDS_TRIGGER_NETDEV depends on IGC && LEDS_CLASS + depends on LEDS_CLASS=y || IGC=m help Optional support for controlling the NIC LED's with the netdev LED trigger. diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 01f0f12035ca..3fcb8daaa243 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -171,8 +171,8 @@ static int debug = 3; static int eeprom_bad_csum_allow = 0; static int use_io = 0; module_param(debug, int, 0); -module_param(eeprom_bad_csum_allow, int, 0); -module_param(use_io, int, 0); +module_param(eeprom_bad_csum_allow, int, 0444); +module_param(use_io, int, 0444); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); MODULE_PARM_DESC(eeprom_bad_csum_allow, "Allow bad eeprom checksums"); MODULE_PARM_DESC(use_io, "Force use of i/o access mode"); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index af5d9d97a0d6..cc8c531ec3df 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6688,14 +6688,14 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) if (adapter->hw.phy.type == e1000_phy_igp_3) { e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); } else if (hw->mac.type >= e1000_pch_lpt) { - if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) + if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) { /* ULP does not support wake from unicast, multicast * or broadcast. */ retval = e1000_enable_ulp_lpt_lp(hw, !runtime); - - if (retval) - return retval; + if (retval) + return retval; + } } /* Ensure that the appropriate bits are set in LPI_CTRL diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f12092cdb1f0..3fada49b8ae2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13208,12 +13208,12 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; /* IPLEN and EIPLEN can support at most 127 dwords */ - len = skb_transport_header(skb) - skb_network_header(skb); + len = skb_network_header_len(skb); if (len & ~(127 * 4)) goto out_err; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 335fd13e86f7..aefec6bd3b67 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4423,12 +4423,12 @@ static netdev_features_t iavf_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; /* MACLEN can support at most 63 words */ - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err; /* IPLEN and EIPLEN can support at most 127 dwords */ - len = skb_transport_header(skb) - skb_network_header(skb); + len = skb_network_header_len(skb); if (len & ~(127 * 4)) goto out_err; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index cebb44f51d5f..518298bbdadc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -202,7 +202,7 @@ static struct notifier_block dca_notifier = { #endif #ifdef CONFIG_PCI_IOV static unsigned int max_vfs; -module_param(max_vfs, uint, 0); +module_param(max_vfs, uint, 0444); MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function"); #endif /* CONFIG_PCI_IOV */ @@ -2538,7 +2538,7 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index a4d4f00e6a87..b0cf310e6f7b 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2655,7 +2655,7 @@ igbvf_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGBVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index cfa6baccec55..90316dc58630 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -570,7 +570,6 @@ struct igc_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; - struct net_device poll_dev; /* for dynamic allocation of rings associated with this q_vector */ struct igc_ring ring[] ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 3af52d238f3b..34820f6a78b9 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5277,7 +5277,7 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index e0c300fe5cee..cdaf087b4e85 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -334,7 +334,9 @@ static int ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1) { *speed = IXGBE_LINK_SPEED_1GB_FULL; *autoneg = true; return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 633bac1543dd..6e6e6f1847b6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -349,6 +349,8 @@ static int ixgbe_get_link_ksettings(struct net_device *netdev, case ixgbe_sfp_type_1g_sx_core1: case ixgbe_sfp_type_1g_lx_core0: case ixgbe_sfp_type_1g_lx_core1: + case ixgbe_sfp_type_1g_bx_core0: + case ixgbe_sfp_type_1g_bx_core1: ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); ethtool_link_ksettings_add_link_mode(cmd, advertising, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e23c3614fb10..595098a4c488 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -153,7 +153,7 @@ MODULE_PARM_DESC(max_vfs, #endif /* CONFIG_PCI_IOV */ static bool allow_unsupported_sfp; -module_param(allow_unsupported_sfp, bool, 0); +module_param(allow_unsupported_sfp, bool, 0444); MODULE_PARM_DESC(allow_unsupported_sfp, "Allow unsupported and untested SFP+ modules on 82599-based adapters"); @@ -10205,7 +10205,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 75e9453331ed..07eaa3c3f4d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -1532,6 +1532,7 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type; struct ixgbe_adapter *adapter = hw->back; u8 oui_bytes[3] = {0, 0, 0}; + u8 bitrate_nominal = 0; u8 comp_codes_10g = 0; u8 comp_codes_1g = 0; u16 enforce_sfp = 0; @@ -1576,7 +1577,12 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_CABLE_TECHNOLOGY, &cable_tech); + if (status) + goto err_read_i2c_eeprom; + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_BITRATE_NOMINAL, + &bitrate_nominal); if (status) goto err_read_i2c_eeprom; @@ -1659,6 +1665,18 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) else hw->phy.sfp_type = ixgbe_sfp_type_1g_lx_core1; + /* Support only Ethernet 1000BASE-BX10, checking the Bit Rate + * Nominal Value as per SFF-8472 by convention 1.25 Gb/s should + * be rounded up to 0Dh (13 in units of 100 MBd) for 1000BASE-BX + */ + } else if ((comp_codes_1g & IXGBE_SFF_BASEBX10_CAPABLE) && + (bitrate_nominal == 0xD)) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_1g_bx_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_1g_bx_core1; } else { hw->phy.sfp_type = ixgbe_sfp_type_unknown; } @@ -1747,7 +1765,9 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1)) { hw->phy.type = ixgbe_phy_sfp_unsupported; return -EOPNOTSUPP; } @@ -1763,7 +1783,9 @@ int ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || - hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_bx_core1)) { /* Make sure we're a supported PHY type */ if (hw->phy.type == ixgbe_phy_sfp_intel) return 0; @@ -1999,12 +2021,14 @@ int ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 || sfp_type == ixgbe_sfp_type_1g_lx_core0 || sfp_type == ixgbe_sfp_type_1g_cu_core0 || - sfp_type == ixgbe_sfp_type_1g_sx_core0) + sfp_type == ixgbe_sfp_type_1g_sx_core0 || + sfp_type == ixgbe_sfp_type_1g_bx_core0) sfp_type = ixgbe_sfp_type_srlr_core0; else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 || sfp_type == ixgbe_sfp_type_1g_lx_core1 || sfp_type == ixgbe_sfp_type_1g_cu_core1 || - sfp_type == ixgbe_sfp_type_1g_sx_core1) + sfp_type == ixgbe_sfp_type_1g_sx_core1 || + sfp_type == ixgbe_sfp_type_1g_bx_core1) sfp_type = ixgbe_sfp_type_srlr_core1; /* Read offset to PHY init contents */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index beedcb7bec0d..14aa2ca51f70 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -17,6 +17,7 @@ #define IXGBE_SFF_1GBE_COMP_CODES 0x6 #define IXGBE_SFF_10GBE_COMP_CODES 0x3 #define IXGBE_SFF_CABLE_TECHNOLOGY 0x8 +#define IXGBE_SFF_BITRATE_NOMINAL 0xC #define IXGBE_SFF_CABLE_SPEC_COMP 0x3C #define IXGBE_SFF_SFF_8472_SWAP 0x5C #define IXGBE_SFF_SFF_8472_COMP 0x5E @@ -39,6 +40,7 @@ #define IXGBE_SFF_1GBASESX_CAPABLE 0x1 #define IXGBE_SFF_1GBASELX_CAPABLE 0x2 #define IXGBE_SFF_1GBASET_CAPABLE 0x8 +#define IXGBE_SFF_BASEBX10_CAPABLE 0x64 #define IXGBE_SFF_10GBASESR_CAPABLE 0x10 #define IXGBE_SFF_10GBASELR_CAPABLE 0x20 #define IXGBE_SFF_SOFT_RS_SELECT_MASK 0x8 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index d44c58130be2..ed440dd0c4f9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3210,6 +3210,9 @@ enum ixgbe_sfp_type { ixgbe_sfp_type_1g_sx_core1 = 12, ixgbe_sfp_type_1g_lx_core0 = 13, ixgbe_sfp_type_1g_lx_core1 = 14, + ixgbe_sfp_type_1g_bx_core0 = 15, + ixgbe_sfp_type_1g_bx_core1 = 16, + ixgbe_sfp_type_not_present = 0xFFFE, ixgbe_sfp_type_unknown = 0xFFFF }; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a44e4bd56142..9c960017a6de 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4413,7 +4413,7 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, unsigned int network_hdr_len, mac_hdr_len; /* Make certain the headers can be described by a context descriptor */ - mac_hdr_len = skb_network_header(skb) - skb->data; + mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBEVF_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC | diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index edd12d09dc89..07d4859de53a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -817,6 +817,8 @@ static int rvu_fwdata_init(struct rvu *rvu) err = cgx_get_fwdata_base(&fwdbase); if (err) goto fail; + + BUILD_BUG_ON(offsetof(struct rvu_fwdata, cgx_fw_data) > FWDATA_CGX_LMAC_OFFSET); rvu->fwdata = ioremap_wc(fwdbase, sizeof(struct rvu_fwdata)); if (!rvu->fwdata) goto fail; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index de8eba902276..f390525a6217 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -469,11 +469,12 @@ struct rvu_fwdata { u32 ptp_ext_clk_rate; u32 ptp_ext_tstamp; struct channel_fwdata channel_data; -#define FWDATA_RESERVED_MEM 1014 +#define FWDATA_RESERVED_MEM 958 u64 reserved[FWDATA_RESERVED_MEM]; #define CGX_MAX 9 #define CGX_LMACS_MAX 4 #define CGX_LMACS_USX 8 +#define FWDATA_CGX_LMAC_OFFSET 10536 union { struct cgx_lmac_fwdata_s cgx_fw_data[CGX_MAX][CGX_LMACS_MAX]; diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index d58b07e7e123..7063c78bd35f 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -286,7 +286,6 @@ mtk_wed_wo_queue_free(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) static void mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) { - struct page *page; int i; for (i = 0; i < q->n_desc; i++) { @@ -301,19 +300,12 @@ mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) entry->buf = NULL; } - if (!q->cache.va) - return; - - page = virt_to_page(q->cache.va); - __page_frag_cache_drain(page, q->cache.pagecnt_bias); - memset(&q->cache, 0, sizeof(q->cache)); + page_frag_cache_drain(&q->cache); } static void mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) { - struct page *page; - for (;;) { void *buf = mtk_wed_wo_dequeue(wo, q, NULL, true); @@ -323,12 +315,7 @@ mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) skb_free_frag(buf); } - if (!q->cache.va) - return; - - page = virt_to_page(q->cache.va); - __page_frag_cache_drain(page, q->cache.pagecnt_bias); - memset(&q->cache, 0, sizeof(q->cache)); + page_frag_cache_drain(&q->cache); } static void diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c index 91327ef670c7..c3ae11a48024 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c @@ -113,8 +113,8 @@ static const struct debugfs_reg32 intr_ctrl_regs[] = { void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq) { struct dentry *qcq_dentry, *q_dentry, *cq_dentry; - struct dentry *intr_dentry, *stats_dentry; struct ionic_dev *idev = &lif->ionic->idev; + struct dentry *intr_dentry, *stats_dentry; struct debugfs_regset32 *intr_ctrl_regset; struct ionic_intr_info *intr = &qcq->intr; struct debugfs_blob_wrapper *desc_blob; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index bfcfc2d7bcbd..516db910e8e8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -16,9 +16,10 @@ #define IONIC_MAX_TX_DESC 8192 #define IONIC_MAX_RX_DESC 16384 #define IONIC_MIN_TXRX_DESC 64 -#define IONIC_DEF_TXRX_DESC 4096 +#define IONIC_DEF_TXRX_DESC 1024 #define IONIC_RX_FILL_THRESHOLD 16 #define IONIC_RX_FILL_DIV 8 +#define IONIC_TSO_DESCS_NEEDED 44 /* 64K TSO @1500B */ #define IONIC_LIFS_MAX 1024 #define IONIC_WATCHDOG_SECS 5 #define IONIC_ITR_COAL_USEC_DEFAULT 64 @@ -379,6 +380,7 @@ typedef void (*ionic_cq_done_cb)(void *done_arg); unsigned int ionic_cq_service(struct ionic_cq *cq, unsigned int work_to_do, ionic_cq_cb cb, ionic_cq_done_cb done_cb, void *done_arg); +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do); int ionic_q_init(struct ionic_lif *lif, struct ionic_dev *idev, struct ionic_queue *q, unsigned int index, const char *name, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1496893c28be..33b1691a4ee5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -52,15 +52,20 @@ static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q); static void ionic_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); - struct ionic_intr_info *intr; struct dim_cq_moder cur_moder; + struct ionic_intr_info *intr; struct ionic_qcq *qcq; struct ionic_lif *lif; + struct ionic_queue *q; u32 new_coal; - cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); - lif = qcq->q.lif; + q = &qcq->q; + if (q->type == IONIC_QTYPE_RXQ) + cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + else + cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + lif = q->lif; new_coal = ionic_coal_usec_to_hw(lif->ionic, cur_moder.usec); new_coal = new_coal ? new_coal : 1; @@ -685,7 +690,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, } INIT_WORK(&new->dim.work, ionic_dim_work); - new->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + new->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; *qcq = new; @@ -1262,8 +1267,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget) ionic_rx_service, NULL, NULL); if (lif->hwstamp_txq) - tx_work = ionic_cq_service(&lif->hwstamp_txq->cq, budget, - ionic_tx_service, NULL, NULL); + tx_work = ionic_tx_cq_service(&lif->hwstamp_txq->cq, budget); work_done = max(max(n_work, a_work), max(rx_work, tx_work)); if (work_done < budget && napi_complete_done(napi, work_done)) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 42006de8069d..b4f8692a3ead 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -327,7 +327,7 @@ static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs) static inline bool ionic_txq_hwstamp_enabled(struct ionic_queue *q) { - return unlikely(q->features & IONIC_TXQ_F_HWSTAMP); + return q->features & IONIC_TXQ_F_HWSTAMP; } void ionic_link_status_check_request(struct ionic_lif *lif, bool can_sleep); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 2f479de329fe..29b4d039bbce 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -15,7 +15,7 @@ #include "ionic_debugfs.h" MODULE_DESCRIPTION(IONIC_DRV_DESCRIPTION); -MODULE_AUTHOR("Pensando Systems, Inc"); +MODULE_AUTHOR("Shannon Nelson <[email protected]>"); MODULE_LICENSE("GPL"); static const char *ionic_error_to_str(enum ionic_status_code code) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 56a7ad5bff17..6d168ad8c84f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -5,13 +5,12 @@ #include <linux/ipv6.h> #include <linux/if_vlan.h> #include <net/ip6_checksum.h> +#include <net/netdev_queues.h> #include "ionic.h" #include "ionic_lif.h" #include "ionic_txrx.h" -static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs); - static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len); @@ -41,8 +40,8 @@ static inline void ionic_rxq_post(struct ionic_queue *q, bool ring_dbell, bool ionic_txq_poke_doorbell(struct ionic_queue *q) { - unsigned long now, then, dif; struct netdev_queue *netdev_txq; + unsigned long now, then, dif; struct net_device *netdev; netdev = q->lif->netdev; @@ -100,9 +99,10 @@ bool ionic_rxq_poke_doorbell(struct ionic_queue *q) return true; } -static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q) +static inline struct netdev_queue *q_to_ndq(struct net_device *netdev, + struct ionic_queue *q) { - return netdev_get_tx_queue(q->lif->netdev, q->index); + return netdev_get_tx_queue(netdev, q->index); } static void *ionic_rx_buf_va(struct ionic_buf_info *buf_info) @@ -123,7 +123,6 @@ static unsigned int ionic_rx_buf_size(struct ionic_buf_info *buf_info) static int ionic_rx_page_alloc(struct ionic_queue *q, struct ionic_buf_info *buf_info) { - struct net_device *netdev = q->lif->netdev; struct ionic_rx_stats *stats; struct device *dev; struct page *page; @@ -133,14 +132,14 @@ static int ionic_rx_page_alloc(struct ionic_queue *q, if (unlikely(!buf_info)) { net_err_ratelimited("%s: %s invalid buf_info in alloc\n", - netdev->name, q->name); + dev_name(dev), q->name); return -EINVAL; } page = alloc_pages(IONIC_PAGE_GFP_MASK, 0); if (unlikely(!page)) { net_err_ratelimited("%s: %s page alloc failed\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->alloc_err++; return -ENOMEM; } @@ -150,7 +149,7 @@ static int ionic_rx_page_alloc(struct ionic_queue *q, if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) { __free_pages(page, 0); net_err_ratelimited("%s: %s dma map failed\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->dma_map_err++; return -EIO; } @@ -164,12 +163,11 @@ static int ionic_rx_page_alloc(struct ionic_queue *q, static void ionic_rx_page_free(struct ionic_queue *q, struct ionic_buf_info *buf_info) { - struct net_device *netdev = q->lif->netdev; struct device *dev = q->dev; if (unlikely(!buf_info)) { net_err_ratelimited("%s: %s invalid buf_info in free\n", - netdev->name, q->name); + dev_name(dev), q->name); return; } @@ -204,14 +202,14 @@ static bool ionic_rx_buf_recycle(struct ionic_queue *q, return true; } -static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, +static struct sk_buff *ionic_rx_frags(struct net_device *netdev, + struct ionic_queue *q, struct ionic_desc_info *desc_info, unsigned int headroom, unsigned int len, unsigned int num_sg_elems, bool synced) { - struct net_device *netdev = q->lif->netdev; struct ionic_buf_info *buf_info; struct ionic_rx_stats *stats; struct device *dev = q->dev; @@ -228,7 +226,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, skb = napi_get_frags(&q_to_qcq(q)->napi); if (unlikely(!skb)) { net_warn_ratelimited("%s: SKB alloc failed on %s!\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->alloc_err++; return NULL; } @@ -272,13 +270,13 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, return skb; } -static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, +static struct sk_buff *ionic_rx_copybreak(struct net_device *netdev, + struct ionic_queue *q, struct ionic_desc_info *desc_info, unsigned int headroom, unsigned int len, bool synced) { - struct net_device *netdev = q->lif->netdev; struct ionic_buf_info *buf_info; struct ionic_rx_stats *stats; struct device *dev = q->dev; @@ -291,7 +289,7 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, skb = napi_alloc_skb(&q_to_qcq(q)->napi, len); if (unlikely(!skb)) { net_warn_ratelimited("%s: SKB alloc failed on %s!\n", - netdev->name, q->name); + dev_name(dev), q->name); stats->alloc_err++; return NULL; } @@ -309,7 +307,7 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, headroom, len, DMA_FROM_DEVICE); skb_put(skb, len); - skb->protocol = eth_type_trans(skb, q->lif->netdev); + skb->protocol = eth_type_trans(skb, netdev); return skb; } @@ -349,8 +347,7 @@ static void ionic_xdp_tx_desc_clean(struct ionic_queue *q, desc_info->act = 0; } -static int ionic_xdp_post_frame(struct net_device *netdev, - struct ionic_queue *q, struct xdp_frame *frame, +static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, enum xdp_action act, struct page *page, int off, bool ring_doorbell) { @@ -458,14 +455,16 @@ int ionic_xdp_xmit(struct net_device *netdev, int n, txq_trans_cond_update(nq); if (netif_tx_queue_stopped(nq) || - unlikely(ionic_maybe_stop_tx(txq, 1))) { + !netif_txq_maybe_stop(q_to_ndq(netdev, txq), + ionic_q_space_avail(txq), + 1, 1)) { __netif_tx_unlock(nq); return -EIO; } space = min_t(int, n, ionic_q_space_avail(txq)); for (nxmit = 0; nxmit < space ; nxmit++) { - if (ionic_xdp_post_frame(netdev, txq, xdp_frames[nxmit], + if (ionic_xdp_post_frame(txq, xdp_frames[nxmit], XDP_REDIRECT, virt_to_page(xdp_frames[nxmit]->data), 0, false)) { @@ -478,7 +477,9 @@ int ionic_xdp_xmit(struct net_device *netdev, int n, ionic_dbell_ring(lif->kern_dbpage, txq->hw_type, txq->dbval | txq->head_idx); - ionic_maybe_stop_tx(txq, 4); + netif_txq_maybe_stop(q_to_ndq(netdev, txq), + ionic_q_space_avail(txq), + 4, 4); __netif_tx_unlock(nq); return nxmit; @@ -571,7 +572,9 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, txq_trans_cond_update(nq); if (netif_tx_queue_stopped(nq) || - unlikely(ionic_maybe_stop_tx(txq, 1))) { + !netif_txq_maybe_stop(q_to_ndq(netdev, txq), + ionic_q_space_avail(txq), + 1, 1)) { __netif_tx_unlock(nq); goto out_xdp_abort; } @@ -579,7 +582,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, dma_unmap_page(rxq->dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - err = ionic_xdp_post_frame(netdev, txq, xdpf, XDP_TX, + err = ionic_xdp_post_frame(txq, xdpf, XDP_TX, buf_info->page, buf_info->page_offset, true); @@ -657,9 +660,10 @@ static void ionic_rx_clean(struct ionic_queue *q, headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0; if (len <= q->lif->rx_copybreak) - skb = ionic_rx_copybreak(q, desc_info, headroom, len, !!xdp_prog); + skb = ionic_rx_copybreak(netdev, q, desc_info, + headroom, len, !!xdp_prog); else - skb = ionic_rx_frags(q, desc_info, headroom, len, + skb = ionic_rx_frags(netdev, q, desc_info, headroom, len, comp->num_sg_elems, !!xdp_prog); if (unlikely(!skb)) { @@ -946,8 +950,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) lif = cq->bound_q->lif; idev = &lif->ionic->idev; - work_done = ionic_cq_service(cq, budget, - ionic_tx_service, NULL, NULL); + work_done = ionic_tx_cq_service(cq, budget); if (unlikely(!budget)) return budget; @@ -1038,8 +1041,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) txqcq = lif->txqcqs[qi]; txcq = &lif->txqcqs[qi]->cq; - tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, - ionic_tx_service, NULL, NULL); + tx_work_done = ionic_tx_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT); if (unlikely(!budget)) return budget; @@ -1082,7 +1084,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { net_warn_ratelimited("%s: DMA single map failed on %s!\n", - q->lif->netdev->name, q->name); + dev_name(dev), q->name); stats->dma_map_err++; return 0; } @@ -1100,7 +1102,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { net_warn_ratelimited("%s: DMA frag map failed on %s!\n", - q->lif->netdev->name, q->name); + dev_name(dev), q->name); stats->dma_map_err++; } return dma_addr; @@ -1183,7 +1185,6 @@ static void ionic_tx_clean(struct ionic_queue *q, struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_qcq *qcq = q_to_qcq(q); struct sk_buff *skb = cb_arg; - u16 qi; if (desc_info->xdpf) { ionic_xdp_tx_desc_clean(q->partner, desc_info); @@ -1200,9 +1201,7 @@ static void ionic_tx_clean(struct ionic_queue *q, if (!skb) return; - qi = skb_get_queue_mapping(skb); - - if (ionic_txq_hwstamp_enabled(q)) { + if (unlikely(ionic_txq_hwstamp_enabled(q))) { if (cq_info) { struct skb_shared_hwtstamps hwts = {}; __le64 *cq_desc_hwstamp; @@ -1227,24 +1226,22 @@ static void ionic_tx_clean(struct ionic_queue *q, stats->hwstamp_invalid++; } } - - } else if (unlikely(__netif_subqueue_stopped(q->lif->netdev, qi))) { - netif_wake_subqueue(q->lif->netdev, qi); } desc_info->bytes = skb->len; stats->clean++; - dev_consume_skb_any(skb); + napi_consume_skb(skb, 1); } -bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) +static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info, + unsigned int *total_pkts, unsigned int *total_bytes) { struct ionic_queue *q = cq->bound_q; struct ionic_desc_info *desc_info; struct ionic_txq_comp *comp; - int bytes = 0; - int pkts = 0; + unsigned int bytes = 0; + unsigned int pkts = 0; u16 index; comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp); @@ -1269,19 +1266,52 @@ bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) desc_info->cb_arg = NULL; } while (index != le16_to_cpu(comp->comp_index)); - if (pkts && bytes && !ionic_txq_hwstamp_enabled(q)) - netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); + (*total_pkts) += pkts; + (*total_bytes) += bytes; return true; } +unsigned int ionic_tx_cq_service(struct ionic_cq *cq, unsigned int work_to_do) +{ + struct ionic_cq_info *cq_info; + unsigned int work_done = 0; + unsigned int bytes = 0; + unsigned int pkts = 0; + + if (work_to_do == 0) + return 0; + + cq_info = &cq->info[cq->tail_idx]; + while (ionic_tx_service(cq, cq_info, &pkts, &bytes)) { + if (cq->tail_idx == cq->num_descs - 1) + cq->done_color = !cq->done_color; + cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1); + cq_info = &cq->info[cq->tail_idx]; + + if (++work_done >= work_to_do) + break; + } + + if (work_done) { + struct ionic_queue *q = cq->bound_q; + + if (likely(!ionic_txq_hwstamp_enabled(q))) + netif_txq_completed_wake(q_to_ndq(q->lif->netdev, q), + pkts, bytes, + ionic_q_space_avail(q), + IONIC_TSO_DESCS_NEEDED); + } + + return work_done; +} + void ionic_tx_flush(struct ionic_cq *cq) { struct ionic_dev *idev = &cq->lif->ionic->idev; u32 work_done; - work_done = ionic_cq_service(cq, cq->num_descs, - ionic_tx_service, NULL, NULL); + work_done = ionic_tx_cq_service(cq, cq->num_descs); if (work_done) ionic_intr_credits(idev->intr_ctrl, cq->bound_intr->index, work_done, IONIC_INTR_CRED_RESET_COALESCE); @@ -1307,8 +1337,12 @@ void ionic_tx_empty(struct ionic_queue *q) desc_info->cb_arg = NULL; } - if (pkts && bytes && !ionic_txq_hwstamp_enabled(q)) - netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); + if (likely(!ionic_txq_hwstamp_enabled(q))) { + struct netdev_queue *ndq = q_to_ndq(q->lif->netdev, q); + + netdev_tx_completed_queue(ndq, pkts, bytes); + netdev_tx_reset_queue(ndq); + } } static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb) @@ -1356,7 +1390,7 @@ static int ionic_tx_tcp_pseudo_csum(struct sk_buff *skb) return 0; } -static void ionic_tx_tso_post(struct ionic_queue *q, +static void ionic_tx_tso_post(struct net_device *netdev, struct ionic_queue *q, struct ionic_desc_info *desc_info, struct sk_buff *skb, dma_addr_t addr, u8 nsge, u16 len, @@ -1385,15 +1419,16 @@ static void ionic_tx_tso_post(struct ionic_queue *q, if (start) { skb_tx_timestamp(skb); - if (!ionic_txq_hwstamp_enabled(q)) - netdev_tx_sent_queue(q_to_ndq(q), skb->len); + if (likely(!ionic_txq_hwstamp_enabled(q))) + netdev_tx_sent_queue(q_to_ndq(netdev, q), skb->len); ionic_txq_post(q, false, ionic_tx_clean, skb); } else { ionic_txq_post(q, done, NULL, NULL); } } -static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) +static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, + struct sk_buff *skb) { struct ionic_tx_stats *stats = q_to_tx_stats(q); struct ionic_desc_info *desc_info; @@ -1501,7 +1536,7 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) seg_rem = min(tso_rem, mss); done = (tso_rem == 0); /* post descriptor */ - ionic_tx_tso_post(q, desc_info, skb, + ionic_tx_tso_post(netdev, q, desc_info, skb, desc_addr, desc_nsge, desc_len, hdrlen, mss, outer_csum, vlan_tci, has_vlan, start, done); @@ -1611,10 +1646,12 @@ static void ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, stats->frags += skb_shinfo(skb)->nr_frags; } -static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) +static int ionic_tx(struct net_device *netdev, struct ionic_queue *q, + struct sk_buff *skb) { struct ionic_desc_info *desc_info = &q->info[q->head_idx]; struct ionic_tx_stats *stats = q_to_tx_stats(q); + bool ring_dbell = true; if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) return -EIO; @@ -1632,16 +1669,22 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) stats->pkts++; stats->bytes += skb->len; - if (!ionic_txq_hwstamp_enabled(q)) - netdev_tx_sent_queue(q_to_ndq(q), skb->len); - ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb); + if (likely(!ionic_txq_hwstamp_enabled(q))) { + struct netdev_queue *ndq = q_to_ndq(netdev, q); + + if (unlikely(!ionic_q_has_space(q, MAX_SKB_FRAGS + 1))) + netif_tx_stop_queue(ndq); + ring_dbell = __netdev_tx_sent_queue(ndq, skb->len, + netdev_xmit_more()); + } + ionic_txq_post(q, ring_dbell, ionic_tx_clean, skb); return 0; } static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) { - struct ionic_tx_stats *stats = q_to_tx_stats(q); + int nr_frags = skb_shinfo(skb)->nr_frags; bool too_many_frags = false; skb_frag_t *frag; int desc_bufs; @@ -1657,17 +1700,20 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) /* Each desc is mss long max, so a descriptor for each gso_seg */ if (skb_is_gso(skb)) { ndescs = skb_shinfo(skb)->gso_segs; + if (!nr_frags) + return ndescs; } else { ndescs = 1; - if (skb_shinfo(skb)->nr_frags > q->max_sg_elems) { + if (!nr_frags) + return ndescs; + + if (unlikely(nr_frags > q->max_sg_elems)) { too_many_frags = true; goto linearize; } - } - /* If non-TSO, or no frags to check, we're done */ - if (!skb_is_gso(skb) || !skb_shinfo(skb)->nr_frags) return ndescs; + } /* We need to scan the skb to be sure that none of the MTU sized * packets in the TSO will require more sgs per descriptor than we @@ -1715,6 +1761,8 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) linearize: if (too_many_frags) { + struct ionic_tx_stats *stats = q_to_tx_stats(q); + err = skb_linearize(skb); if (err) return err; @@ -1724,30 +1772,11 @@ linearize: return ndescs; } -static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs) -{ - int stopped = 0; - - if (unlikely(!ionic_q_has_space(q, ndescs))) { - netif_stop_subqueue(q->lif->netdev, q->index); - stopped = 1; - - /* Might race with ionic_tx_clean, check again */ - smp_rmb(); - if (ionic_q_has_space(q, ndescs)) { - netif_wake_subqueue(q->lif->netdev, q->index); - stopped = 0; - } - } - - return stopped; -} - static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_queue *q = &lif->hwstamp_txq->q; + struct ionic_queue *q; int err, ndescs; /* Does not stop/start txq, because we post to a separate tx queue @@ -1755,6 +1784,7 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, * the timestamping queue, it is dropped. */ + q = &lif->hwstamp_txq->q; ndescs = ionic_tx_descs_needed(q, skb); if (unlikely(ndescs < 0)) goto err_out_drop; @@ -1764,9 +1794,9 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP; if (skb_is_gso(skb)) - err = ionic_tx_tso(q, skb); + err = ionic_tx_tso(netdev, q, skb); else - err = ionic_tx(q, skb); + err = ionic_tx(netdev, q, skb); if (err) goto err_out_drop; @@ -1804,23 +1834,19 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev) if (ndescs < 0) goto err_out_drop; - if (unlikely(ionic_maybe_stop_tx(q, ndescs))) + if (!netif_txq_maybe_stop(q_to_ndq(netdev, q), + ionic_q_space_avail(q), + ndescs, ndescs)) return NETDEV_TX_BUSY; if (skb_is_gso(skb)) - err = ionic_tx_tso(q, skb); + err = ionic_tx_tso(netdev, q, skb); else - err = ionic_tx(q, skb); + err = ionic_tx(netdev, q, skb); if (err) goto err_out_drop; - /* Stop the queue if there aren't descriptors for the next packet. - * Since our SG lists per descriptor take care of most of the possible - * fragmentation, we don't need to have many descriptors available. - */ - ionic_maybe_stop_tx(q, 4); - return NETDEV_TX_OK; err_out_drop: diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h index 82fc38e0f573..68228bb8c119 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h @@ -15,7 +15,6 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget); netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev); bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); -bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); int ionic_xdp_xmit(struct net_device *netdev, int n, struct xdp_frame **xdp, u32 flags); #endif /* _IONIC_TXRX_H_ */ diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index cb1746bc0e0c..847fa62c80df 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -215,7 +215,7 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb, bd2_bits1 |= (1 << ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT); - bd2_bits2 |= ((((u8 *)skb_transport_header(skb) - skb->data) >> 1) & + bd2_bits2 |= ((skb_transport_offset(skb) >> 1) & ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK) << ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 41894d154013..b9dc0071c5de 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -446,8 +446,7 @@ static int qlcnic_tx_encap_pkt(struct qlcnic_adapter *adapter, encap_descr |= skb_network_offset(skb) << 10; first_desc->encap_descr = cpu_to_le16(encap_descr); - first_desc->tcp_hdr_offset = skb_inner_transport_header(skb) - - skb->data; + first_desc->tcp_hdr_offset = skb_inner_transport_offset(skb); first_desc->ip_hdr_offset = skb_inner_network_offset(skb); qlcnic_set_tx_flags_opcode(first_desc, flags, opcode); diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c index a7a9ab304e13..71f9b5ec5ae4 100644 --- a/drivers/net/ethernet/sfc/siena/tx_common.c +++ b/drivers/net/ethernet/sfc/siena/tx_common.c @@ -317,11 +317,10 @@ static int efx_tx_tso_header_length(struct sk_buff *skb) size_t header_len; if (skb->encapsulation) - header_len = skb_inner_transport_header(skb) - - skb->data + + header_len = skb_inner_transport_offset(skb) + (inner_tcp_hdr(skb)->doff << 2u); else - header_len = skb_transport_header(skb) - skb->data + + header_len = skb_transport_offset(skb) + (tcp_hdr(skb)->doff << 2u); return header_len; } diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 9f2393d34371..2adb132b2f7e 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -336,11 +336,10 @@ int efx_tx_tso_header_length(struct sk_buff *skb) size_t header_len; if (skb->encapsulation) - header_len = skb_inner_transport_header(skb) - - skb->data + + header_len = skb_inner_transport_offset(skb) + (inner_tcp_hdr(skb)->doff << 2u); else - header_len = skb_transport_header(skb) - skb->data + + header_len = skb_transport_offset(skb) + (tcp_hdr(skb)->doff << 2u); return header_len; } diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c index 64a6768f75ea..ddf149db8180 100644 --- a/drivers/net/ethernet/sfc/tx_tso.c +++ b/drivers/net/ethernet/sfc/tx_tso.c @@ -174,8 +174,8 @@ static int tso_start(struct tso_state *st, struct efx_nic *efx, unsigned int header_len, in_len; dma_addr_t dma_addr; - st->ip_off = skb_network_header(skb) - skb->data; - st->tcp_off = skb_transport_header(skb) - skb->data; + st->ip_off = skb_network_offset(skb); + st->tcp_off = skb_transport_offset(skb); header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); in_len = skb_headlen(skb) - header_len; st->header_len = header_len; diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 3525d5c0d694..351609f4f011 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1144,9 +1144,9 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) nskb->protocol = skb->protocol; offset = skb_mac_header(skb) - skb->data; skb_set_mac_header(nskb, offset); - offset = skb_network_header(skb) - skb->data; + offset = skb_network_offset(skb); skb_set_network_header(nskb, offset); - offset = skb_transport_header(skb) - skb->data; + offset = skb_transport_offset(skb); skb_set_transport_header(nskb, offset); offset = 0; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 7cf02ab6de68..6dff2c85682d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1257,7 +1257,7 @@ static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first, /* compute header lengths */ l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data) : + *hdr_len = enc ? skb_inner_transport_offset(skb) : skb_transport_offset(skb); *hdr_len += l4len; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index e67a21294158..bd4624d14ca0 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -81,6 +81,7 @@ static void txgbe_up_complete(struct wx *wx) { struct net_device *netdev = wx->netdev; + txgbe_reinit_gpio_intr(wx); wx_control_hw(wx, true); wx_configure_vectors(wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index bae0a8ee7014..93295916b1d2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -475,8 +475,10 @@ irqreturn_t txgbe_gpio_irq_handler(int irq, void *data) gc = txgbe->gpio; for_each_set_bit(hwirq, &gpioirq, gc->ngpio) { int gpio = irq_find_mapping(gc->irq.domain, hwirq); + struct irq_data *d = irq_get_irq_data(gpio); u32 irq_type = irq_get_trigger_type(gpio); + txgbe_gpio_irq_ack(d); handle_nested_irq(gpio); if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) { @@ -489,6 +491,33 @@ irqreturn_t txgbe_gpio_irq_handler(int irq, void *data) return IRQ_HANDLED; } +void txgbe_reinit_gpio_intr(struct wx *wx) +{ + struct txgbe *txgbe = wx->priv; + irq_hw_number_t hwirq; + unsigned long gpioirq; + struct gpio_chip *gc; + unsigned long flags; + + /* for gpio interrupt pending before irq enable */ + gpioirq = rd32(wx, WX_GPIO_INTSTATUS); + + gc = txgbe->gpio; + for_each_set_bit(hwirq, &gpioirq, gc->ngpio) { + int gpio = irq_find_mapping(gc->irq.domain, hwirq); + struct irq_data *d = irq_get_irq_data(gpio); + u32 irq_type = irq_get_trigger_type(gpio); + + txgbe_gpio_irq_ack(d); + + if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) { + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + txgbe_toggle_trigger(gc, hwirq); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + } + } +} + static int txgbe_gpio_init(struct txgbe *txgbe) { struct gpio_irq_chip *girq; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h index 9855d44076cb..8a026d804fe2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h @@ -5,6 +5,7 @@ #define _TXGBE_PHY_H_ irqreturn_t txgbe_gpio_irq_handler(int irq, void *data); +void txgbe_reinit_gpio_intr(struct wx *wx); irqreturn_t txgbe_link_irq_handler(int irq, void *data); int txgbe_init_phy(struct txgbe *txgbe); void txgbe_remove_phy(struct txgbe *txgbe); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6f3f9b446b1d..e25e0a31126c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -508,7 +508,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk, gh_len = geneve_hlen(gh); hlen = off_gnv + gh_len; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { gh = skb_gro_header_slow(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index f3355e040a9e..334cd62cf286 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -21,7 +21,6 @@ struct clk; struct icc_path; struct net_device; -struct platform_device; struct ipa_power; struct ipa_smp2p; @@ -31,7 +30,7 @@ struct ipa_interrupt; * struct ipa - IPA information * @gsi: Embedded GSI structure * @version: IPA hardware version - * @pdev: Platform device + * @dev: IPA device pointer * @completion: Used to signal pipeline clear transfer complete * @nb: Notifier block used for remoteproc SSR * @notifier: Remoteproc SSR notifier @@ -79,7 +78,7 @@ struct ipa_interrupt; struct ipa { struct gsi gsi; enum ipa_version version; - struct platform_device *pdev; + struct device *dev; struct completion completion; struct notifier_block nb; void *notifier; diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index f1419fbd776c..39219963dbb3 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -174,7 +174,7 @@ bool ipa_cmd_table_init_valid(struct ipa *ipa, const struct ipa_mem *mem, u32 offset_max = field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK); u32 size_max = field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK); const char *table = route ? "route" : "filter"; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 size; size = route ? ipa->route_count : ipa->filter_count + 1; @@ -204,7 +204,7 @@ bool ipa_cmd_table_init_valid(struct ipa *ipa, const struct ipa_mem *mem, /* Validate the memory region that holds headers */ static bool ipa_cmd_header_init_local_valid(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const struct ipa_mem *mem; u32 offset_max; u32 size_max; @@ -256,7 +256,7 @@ static bool ipa_cmd_register_write_offset_valid(struct ipa *ipa, const char *name, u32 offset) { struct ipa_cmd_register_write *payload; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 offset_max; u32 bit_count; diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index afa1d56d9095..dd490941615e 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -233,8 +233,8 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count, const struct ipa_gsi_endpoint_data *data) { const struct ipa_gsi_endpoint_data *other_data; - struct device *dev = &ipa->pdev->dev; enum ipa_endpoint_name other_name; + struct device *dev = ipa->dev; if (ipa_gsi_endpoint_data_empty(data)) return true; @@ -388,7 +388,7 @@ static u32 ipa_endpoint_max(struct ipa *ipa, u32 count, const struct ipa_gsi_endpoint_data *data) { const struct ipa_gsi_endpoint_data *dp = data; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; enum ipa_endpoint_name name; u32 max; @@ -606,7 +606,7 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa) count = ipa->modem_tx_count + ipa_cmd_pipeline_clear_count(); trans = ipa_cmd_trans_alloc(ipa, count); if (!trans) { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "no transaction to reset modem exception endpoints\n"); return -EBUSY; } @@ -1498,8 +1498,7 @@ ipa_endpoint_status_tag_valid(struct ipa_endpoint *endpoint, const void *data) if (endpoint_id == command_endpoint->endpoint_id) { complete(&ipa->completion); } else { - dev_err(&ipa->pdev->dev, - "unexpected tagged packet from endpoint %u\n", + dev_err(ipa->dev, "unexpected tagged packet from endpoint %u\n", endpoint_id); } @@ -1536,6 +1535,7 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, void *data = page_address(page) + NET_SKB_PAD; u32 unused = buffer_size - total_len; struct ipa *ipa = endpoint->ipa; + struct device *dev = ipa->dev; u32 resid = total_len; while (resid) { @@ -1544,7 +1544,7 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, u32 len; if (resid < IPA_STATUS_SIZE) { - dev_err(&endpoint->ipa->pdev->dev, + dev_err(dev, "short message (%u bytes < %zu byte status)\n", resid, IPA_STATUS_SIZE); break; @@ -1666,8 +1666,8 @@ void ipa_endpoint_default_route_clear(struct ipa *ipa) */ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint) { - struct device *dev = &endpoint->ipa->pdev->dev; struct ipa *ipa = endpoint->ipa; + struct device *dev = ipa->dev; struct gsi *gsi = &ipa->gsi; bool suspended = false; dma_addr_t addr; @@ -1769,7 +1769,7 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint) gsi_channel_reset(&ipa->gsi, channel_id, true); if (ret) - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d resetting channel %u for endpoint %u\n", ret, endpoint->channel_id, endpoint->endpoint_id); } @@ -1817,7 +1817,7 @@ int ipa_endpoint_enable_one(struct ipa_endpoint *endpoint) ret = gsi_channel_start(gsi, endpoint->channel_id); if (ret) { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d starting %cX channel %u for endpoint %u\n", ret, endpoint->toward_ipa ? 'T' : 'R', endpoint->channel_id, endpoint_id); @@ -1854,14 +1854,13 @@ void ipa_endpoint_disable_one(struct ipa_endpoint *endpoint) /* Note that if stop fails, the channel's state is not well-defined */ ret = gsi_channel_stop(gsi, endpoint->channel_id); if (ret) - dev_err(&ipa->pdev->dev, - "error %d attempting to stop endpoint %u\n", ret, - endpoint_id); + dev_err(ipa->dev, "error %d attempting to stop endpoint %u\n", + ret, endpoint_id); } void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) { - struct device *dev = &endpoint->ipa->pdev->dev; + struct device *dev = endpoint->ipa->dev; struct gsi *gsi = &endpoint->ipa->gsi; int ret; @@ -1881,7 +1880,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) { - struct device *dev = &endpoint->ipa->pdev->dev; + struct device *dev = endpoint->ipa->dev; struct gsi *gsi = &endpoint->ipa->gsi; int ret; @@ -1983,7 +1982,7 @@ void ipa_endpoint_deconfig(struct ipa *ipa) int ipa_endpoint_config(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const struct reg *reg; u32 endpoint_id; u32 hw_limit; diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 4d80bf77a532..c3e8784d51d9 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -19,6 +19,7 @@ * time only these three are supported. */ +#include <linux/platform_device.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/pm_runtime.h> @@ -109,14 +110,13 @@ static irqreturn_t ipa_isr_thread(int irq, void *dev_id) struct ipa_interrupt *interrupt = dev_id; struct ipa *ipa = interrupt->ipa; u32 enabled = interrupt->enabled; + struct device *dev = ipa->dev; const struct reg *reg; - struct device *dev; u32 pending; u32 offset; u32 mask; int ret; - dev = &ipa->pdev->dev; ret = pm_runtime_get_sync(dev); if (WARN_ON(ret < 0)) goto out_power_put; @@ -236,29 +236,17 @@ void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt) } /* Configure the IPA interrupt framework */ -struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa) +int ipa_interrupt_config(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; - struct ipa_interrupt *interrupt; + struct ipa_interrupt *interrupt = ipa->interrupt; + unsigned int irq = interrupt->irq; + struct device *dev = ipa->dev; const struct reg *reg; - unsigned int irq; int ret; - ret = platform_get_irq_byname(ipa->pdev, "ipa"); - if (ret <= 0) { - dev_err(dev, "DT error %d getting \"ipa\" IRQ property\n", - ret); - return ERR_PTR(ret ? : -EINVAL); - } - irq = ret; - - interrupt = kzalloc(sizeof(*interrupt), GFP_KERNEL); - if (!interrupt) - return ERR_PTR(-ENOMEM); interrupt->ipa = ipa; - interrupt->irq = irq; - /* Start with all IPA interrupts disabled */ + /* Disable all IPA interrupt types */ reg = ipa_reg(ipa, IPA_IRQ_EN); iowrite32(0, ipa->reg_virt + reg_offset(reg)); @@ -271,26 +259,59 @@ struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa) ret = dev_pm_set_wake_irq(dev, irq); if (ret) { - dev_err(dev, "error %d registering \"ipa\" IRQ as wakeirq\n", ret); + dev_err(dev, "error %d registering \"ipa\" IRQ as wakeirq\n", + ret); goto err_free_irq; } - return interrupt; + ipa->interrupt = interrupt; + + return 0; err_free_irq: free_irq(interrupt->irq, interrupt); err_kfree: kfree(interrupt); - return ERR_PTR(ret); + return ret; } /* Inverse of ipa_interrupt_config() */ -void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt) +void ipa_interrupt_deconfig(struct ipa *ipa) { - struct device *dev = &interrupt->ipa->pdev->dev; + struct ipa_interrupt *interrupt = ipa->interrupt; + struct device *dev = ipa->dev; + + ipa->interrupt = NULL; dev_pm_clear_wake_irq(dev); free_irq(interrupt->irq, interrupt); +} + +/* Initialize the IPA interrupt structure */ +struct ipa_interrupt *ipa_interrupt_init(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ipa_interrupt *interrupt; + int irq; + + irq = platform_get_irq_byname(pdev, "ipa"); + if (irq <= 0) { + dev_err(dev, "DT error %d getting \"ipa\" IRQ property\n", irq); + + return ERR_PTR(irq ? : -EINVAL); + } + + interrupt = kzalloc(sizeof(*interrupt), GFP_KERNEL); + if (!interrupt) + return ERR_PTR(-ENOMEM); + interrupt->irq = irq; + + return interrupt; +} + +/* Inverse of ipa_interrupt_init() */ +void ipa_interrupt_exit(struct ipa_interrupt *interrupt) +{ kfree(interrupt); } diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h index 53e1b71685c7..f3f4f4330a59 100644 --- a/drivers/net/ipa/ipa_interrupt.h +++ b/drivers/net/ipa/ipa_interrupt.h @@ -76,17 +76,31 @@ void ipa_interrupt_irq_enable(struct ipa *ipa); void ipa_interrupt_irq_disable(struct ipa *ipa); /** - * ipa_interrupt_config() - Configure the IPA interrupt framework + * ipa_interrupt_config() - Configure IPA interrupts * @ipa: IPA pointer * - * Return: Pointer to IPA SMP2P info, or a pointer-coded error + * Return: 0 if successful, or a negative error code */ -struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa); +int ipa_interrupt_config(struct ipa *ipa); /** * ipa_interrupt_deconfig() - Inverse of ipa_interrupt_config() + * @ipa: IPA pointer + */ +void ipa_interrupt_deconfig(struct ipa *ipa); + +/** + * ipa_interrupt_init() - Initialize the IPA interrupt structure + * @pdev: IPA platform device pointer + * + * Return: Pointer to an IPA interrupt structure, or a pointer-coded error + */ +struct ipa_interrupt *ipa_interrupt_init(struct platform_device *pdev); + +/** + * ipa_interrupt_exit() - Inverse of ipa_interrupt_init() * @interrupt: IPA interrupt structure */ -void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt); +void ipa_interrupt_exit(struct ipa_interrupt *interrupt); #endif /* _IPA_INTERRUPT_H_ */ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 00475fd7a205..57b241417e8c 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include <linux/atomic.h> #include <linux/bitfield.h> -#include <linux/device.h> #include <linux/bug.h> #include <linux/io.h> #include <linux/firmware.h> @@ -114,7 +113,7 @@ int ipa_setup(struct ipa *ipa) { struct ipa_endpoint *exception_endpoint; struct ipa_endpoint *command_endpoint; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; int ret; ret = gsi_setup(&ipa->gsi); @@ -542,12 +541,9 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) if (ret) goto err_hardware_deconfig; - ipa->interrupt = ipa_interrupt_config(ipa); - if (IS_ERR(ipa->interrupt)) { - ret = PTR_ERR(ipa->interrupt); - ipa->interrupt = NULL; + ret = ipa_interrupt_config(ipa); + if (ret) goto err_mem_deconfig; - } ipa_uc_config(ipa); @@ -572,8 +568,7 @@ err_endpoint_deconfig: ipa_endpoint_deconfig(ipa); err_uc_deconfig: ipa_uc_deconfig(ipa); - ipa_interrupt_deconfig(ipa->interrupt); - ipa->interrupt = NULL; + ipa_interrupt_deconfig(ipa); err_mem_deconfig: ipa_mem_deconfig(ipa); err_hardware_deconfig: @@ -591,8 +586,7 @@ static void ipa_deconfig(struct ipa *ipa) ipa_modem_deconfig(ipa); ipa_endpoint_deconfig(ipa); ipa_uc_deconfig(ipa); - ipa_interrupt_deconfig(ipa->interrupt); - ipa->interrupt = NULL; + ipa_interrupt_deconfig(ipa); ipa_mem_deconfig(ipa); ipa_hardware_deconfig(ipa); } @@ -808,6 +802,7 @@ out_self: static int ipa_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct ipa_interrupt *interrupt; enum ipa_firmware_loader loader; const struct ipa_data *data; struct ipa_power *power; @@ -839,12 +834,21 @@ static int ipa_probe(struct platform_device *pdev) if (loader == IPA_LOADER_DEFER) return -EPROBE_DEFER; - /* The clock and interconnects might not be ready when we're - * probed, so might return -EPROBE_DEFER. + /* The IPA interrupt might not be ready when we're probed, so this + * might return -EPROBE_DEFER. + */ + interrupt = ipa_interrupt_init(pdev); + if (IS_ERR(interrupt)) + return PTR_ERR(interrupt); + + /* The clock and interconnects might not be ready when we're probed, + * so this might return -EPROBE_DEFER. */ power = ipa_power_init(dev, data->power_data); - if (IS_ERR(power)) - return PTR_ERR(power); + if (IS_ERR(power)) { + ret = PTR_ERR(power); + goto err_interrupt_exit; + } /* No more EPROBE_DEFER. Allocate and initialize the IPA structure */ ipa = kzalloc(sizeof(*ipa), GFP_KERNEL); @@ -853,18 +857,19 @@ static int ipa_probe(struct platform_device *pdev) goto err_power_exit; } - ipa->pdev = pdev; + ipa->dev = dev; dev_set_drvdata(dev, ipa); + ipa->interrupt = interrupt; ipa->power = power; ipa->version = data->version; ipa->modem_route_count = data->modem_route_count; init_completion(&ipa->completion); - ret = ipa_reg_init(ipa); + ret = ipa_reg_init(ipa, pdev); if (ret) goto err_kfree_ipa; - ret = ipa_mem_init(ipa, data->mem_data); + ret = ipa_mem_init(ipa, pdev, data->mem_data); if (ret) goto err_reg_exit; @@ -882,7 +887,7 @@ static int ipa_probe(struct platform_device *pdev) if (ret) goto err_endpoint_exit; - ret = ipa_smp2p_init(ipa, loader == IPA_LOADER_MODEM); + ret = ipa_smp2p_init(ipa, pdev, loader == IPA_LOADER_MODEM); if (ret) goto err_table_exit; @@ -939,17 +944,27 @@ err_kfree_ipa: kfree(ipa); err_power_exit: ipa_power_exit(power); +err_interrupt_exit: + ipa_interrupt_exit(interrupt); return ret; } static void ipa_remove(struct platform_device *pdev) { - struct ipa *ipa = dev_get_drvdata(&pdev->dev); - struct ipa_power *power = ipa->power; - struct device *dev = &pdev->dev; + struct ipa_interrupt *interrupt; + struct ipa_power *power; + struct device *dev; + struct ipa *ipa; int ret; + ipa = dev_get_drvdata(&pdev->dev); + dev = ipa->dev; + WARN_ON(dev != &pdev->dev); + + power = ipa->power; + interrupt = ipa->interrupt; + /* Prevent the modem from triggering a call to ipa_setup(). This * also ensures a modem-initiated setup that's underway completes. */ @@ -991,6 +1006,7 @@ out_power_put: ipa_reg_exit(ipa); kfree(ipa); ipa_power_exit(power); + ipa_interrupt_exit(interrupt); dev_info(dev, "IPA driver removed"); } diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 694960537ecd..709f061ede61 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -9,6 +9,7 @@ #include <linux/bug.h> #include <linux/dma-mapping.h> #include <linux/iommu.h> +#include <linux/platform_device.h> #include <linux/io.h> #include <linux/soc/qcom/smem.h> @@ -75,9 +76,9 @@ ipa_mem_zero_region_add(struct gsi_trans *trans, enum ipa_mem_id mem_id) int ipa_mem_setup(struct ipa *ipa) { dma_addr_t addr = ipa->zero_addr; - const struct reg *reg; const struct ipa_mem *mem; struct gsi_trans *trans; + const struct reg *reg; u32 offset; u16 size; u32 val; @@ -87,7 +88,7 @@ int ipa_mem_setup(struct ipa *ipa) */ trans = ipa_cmd_trans_alloc(ipa, 4); if (!trans) { - dev_err(&ipa->pdev->dev, "no transaction for memory setup\n"); + dev_err(ipa->dev, "no transaction for memory setup\n"); return -EBUSY; } @@ -217,8 +218,8 @@ static bool ipa_mem_id_required(struct ipa *ipa, enum ipa_mem_id mem_id) static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem) { - struct device *dev = &ipa->pdev->dev; enum ipa_mem_id mem_id = mem->id; + struct device *dev = ipa->dev; u16 size_multiple; /* Make sure the memory region is valid for this version of IPA */ @@ -254,7 +255,7 @@ static bool ipa_mem_valid_one(struct ipa *ipa, const struct ipa_mem *mem) static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data) { DECLARE_BITMAP(regions, IPA_MEM_COUNT) = { }; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; enum ipa_mem_id mem_id; u32 i; @@ -290,7 +291,7 @@ static bool ipa_mem_valid(struct ipa *ipa, const struct ipa_mem_data *mem_data) /* Do all memory regions fit within the IPA local memory? */ static bool ipa_mem_size_valid(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 limit = ipa->mem_size; u32 i; @@ -317,7 +318,7 @@ static bool ipa_mem_size_valid(struct ipa *ipa) */ int ipa_mem_config(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const struct ipa_mem *mem; const struct reg *reg; dma_addr_t addr; @@ -393,7 +394,7 @@ err_dma_free: /* Inverse of ipa_mem_config() */ void ipa_mem_deconfig(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; dma_free_coherent(dev, ipa->zero_size, ipa->zero_virt, ipa->zero_addr); ipa->zero_size = 0; @@ -420,8 +421,7 @@ int ipa_mem_zero_modem(struct ipa *ipa) */ trans = ipa_cmd_trans_alloc(ipa, 3); if (!trans) { - dev_err(&ipa->pdev->dev, - "no transaction to zero modem memory\n"); + dev_err(ipa->dev, "no transaction to zero modem memory\n"); return -EBUSY; } @@ -452,7 +452,7 @@ int ipa_mem_zero_modem(struct ipa *ipa) */ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; struct iommu_domain *domain; unsigned long iova; phys_addr_t phys; @@ -485,13 +485,12 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size) static void ipa_imem_exit(struct ipa *ipa) { + struct device *dev = ipa->dev; struct iommu_domain *domain; - struct device *dev; if (!ipa->imem_size) return; - dev = &ipa->pdev->dev; domain = iommu_get_domain_for_dev(dev); if (domain) { size_t size; @@ -527,7 +526,7 @@ static void ipa_imem_exit(struct ipa *ipa) */ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; struct iommu_domain *domain; unsigned long iova; phys_addr_t phys; @@ -594,7 +593,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) static void ipa_smem_exit(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; struct iommu_domain *domain; domain = iommu_get_domain_for_dev(dev); @@ -615,9 +614,10 @@ static void ipa_smem_exit(struct ipa *ipa) } /* Perform memory region-related initialization */ -int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data) +int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev, + const struct ipa_mem_data *mem_data) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = &pdev->dev; struct resource *res; int ret; @@ -634,14 +634,13 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data) if (!ipa_table_mem_valid(ipa, true)) return -EINVAL; - ret = dma_set_mask_and_coherent(&ipa->pdev->dev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret) { dev_err(dev, "error %d setting DMA mask\n", ret); return ret; } - res = platform_get_resource_byname(ipa->pdev, IORESOURCE_MEM, - "ipa-shared"); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ipa-shared"); if (!res) { dev_err(dev, "DT error getting \"ipa-shared\" memory property\n"); diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h index 868e9c20e8c4..28aad00a151d 100644 --- a/drivers/net/ipa/ipa_mem.h +++ b/drivers/net/ipa/ipa_mem.h @@ -6,6 +6,8 @@ #ifndef _IPA_MEM_H_ #define _IPA_MEM_H_ +struct platform_device; + struct ipa; struct ipa_mem_data; @@ -100,7 +102,8 @@ int ipa_mem_setup(struct ipa *ipa); /* No ipa_mem_teardown() needed */ int ipa_mem_zero_modem(struct ipa *ipa); -int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data); +int ipa_mem_init(struct ipa *ipa, struct platform_device *pdev, + const struct ipa_mem_data *mem_data); void ipa_mem_exit(struct ipa *ipa); #endif /* _IPA_MEM_H_ */ diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index 1d1be92fbebc..c27ca3f27f7d 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -58,7 +58,7 @@ static int ipa_open(struct net_device *netdev) struct device *dev; int ret; - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) goto err_power_put; @@ -94,7 +94,7 @@ static int ipa_stop(struct net_device *netdev) struct device *dev; int ret; - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) goto out_power_put; @@ -158,7 +158,7 @@ ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) */ netif_stop_queue(netdev); - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get(dev); if (ret < 1) { /* If a resume won't happen, just drop the packet */ @@ -322,7 +322,7 @@ int ipa_modem_start(struct ipa *ipa) goto out_set_state; } - SET_NETDEV_DEV(netdev, &ipa->pdev->dev); + SET_NETDEV_DEV(netdev, ipa->dev); priv = netdev_priv(netdev); priv->ipa = ipa; priv->tx = ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]; @@ -396,7 +396,7 @@ int ipa_modem_stop(struct ipa *ipa) /* Treat a "clean" modem stop the same as a crash */ static void ipa_modem_crashed(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; int ret; /* Prevent the modem from triggering a call to ipa_setup() */ @@ -443,7 +443,7 @@ static int ipa_modem_notify(struct notifier_block *nb, unsigned long action, { struct ipa *ipa = container_of(nb, struct ipa, nb); struct qcom_ssr_notify_data *notify_data = data; - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; switch (action) { case QCOM_SSR_BEFORE_POWERUP: @@ -492,7 +492,7 @@ int ipa_modem_config(struct ipa *ipa) void ipa_modem_deconfig(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; int ret; ret = qcom_unregister_ssr_notifier(ipa->notifier, &ipa->nb); diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c index 0f635b8356bf..41ca7ef5e20f 100644 --- a/drivers/net/ipa/ipa_power.c +++ b/drivers/net/ipa/ipa_power.c @@ -238,7 +238,7 @@ int ipa_power_setup(struct ipa *ipa) ipa_interrupt_enable(ipa, IPA_IRQ_TX_SUSPEND); - ret = device_init_wakeup(&ipa->pdev->dev, true); + ret = device_init_wakeup(ipa->dev, true); if (ret) ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND); @@ -247,7 +247,7 @@ int ipa_power_setup(struct ipa *ipa) void ipa_power_teardown(struct ipa *ipa) { - (void)device_init_wakeup(&ipa->pdev->dev, false); + (void)device_init_wakeup(ipa->dev, false); ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND); } diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c index f70f0a1d1cda..65c40e207802 100644 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@ -96,7 +96,7 @@ static void ipa_server_init_complete(struct ipa_qmi *ipa_qmi) IPA_QMI_INIT_COMPLETE_IND_SZ, ipa_init_complete_ind_ei, &ind); if (ret) - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d sending init complete indication\n", ret); else ipa_qmi->indication_sent = true; @@ -148,7 +148,7 @@ static void ipa_qmi_ready(struct ipa_qmi *ipa_qmi) ipa = container_of(ipa_qmi, struct ipa, qmi); ret = ipa_modem_start(ipa); if (ret) - dev_err(&ipa->pdev->dev, "error %d starting modem\n", ret); + dev_err(ipa->dev, "error %d starting modem\n", ret); } /* All QMI clients from the modem node are gone (modem shut down or crashed). */ @@ -199,7 +199,7 @@ static void ipa_server_indication_register(struct qmi_handle *qmi, ipa_qmi->indication_requested = true; ipa_qmi_ready(ipa_qmi); /* We might be ready now */ } else { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d sending register indication response\n", ret); } } @@ -228,7 +228,7 @@ static void ipa_server_driver_init_complete(struct qmi_handle *qmi, ipa_qmi->uc_ready = true; ipa_qmi_ready(ipa_qmi); /* We might be ready now */ } else { - dev_err(&ipa->pdev->dev, + dev_err(ipa->dev, "error %d sending init complete response\n", ret); } } @@ -417,7 +417,7 @@ static void ipa_client_init_driver_work(struct work_struct *work) qmi = &ipa_qmi->client_handle; ipa = container_of(ipa_qmi, struct ipa, qmi); - dev = &ipa->pdev->dev; + dev = ipa->dev; ret = qmi_txn_init(qmi, &txn, NULL, NULL); if (ret < 0) { diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c index 6a3203ae6f1e..98625956e0bb 100644 --- a/drivers/net/ipa/ipa_reg.c +++ b/drivers/net/ipa/ipa_reg.c @@ -4,6 +4,7 @@ * Copyright (C) 2019-2023 Linaro Ltd. */ +#include <linux/platform_device.h> #include <linux/io.h> #include "ipa.h" @@ -132,9 +133,9 @@ static const struct regs *ipa_regs(enum ipa_version version) } } -int ipa_reg_init(struct ipa *ipa) +int ipa_reg_init(struct ipa *ipa, struct platform_device *pdev) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = &pdev->dev; const struct regs *regs; struct resource *res; @@ -146,8 +147,7 @@ int ipa_reg_init(struct ipa *ipa) return -EINVAL; /* Setup IPA register memory */ - res = platform_get_resource_byname(ipa->pdev, IORESOURCE_MEM, - "ipa-reg"); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ipa-reg"); if (!res) { dev_err(dev, "DT error getting \"ipa-reg\" memory property\n"); return -ENODEV; diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index 2998f115f12c..62c62495b796 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -12,6 +12,8 @@ #include "ipa_version.h" #include "reg.h" +struct platform_device; + struct ipa; /** @@ -643,7 +645,7 @@ extern const struct regs ipa_regs_v5_5; const struct reg *ipa_reg(struct ipa *ipa, enum ipa_reg_id reg_id); -int ipa_reg_init(struct ipa *ipa); +int ipa_reg_init(struct ipa *ipa, struct platform_device *pdev); void ipa_reg_exit(struct ipa *ipa); #endif /* _IPA_REG_H_ */ diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index 5620dc271fac..aeccce9fab72 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -5,7 +5,7 @@ */ #include <linux/types.h> -#include <linux/device.h> +#include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/panic_notifier.h> @@ -84,15 +84,13 @@ struct ipa_smp2p { */ static void ipa_smp2p_notify(struct ipa_smp2p *smp2p) { - struct device *dev; u32 value; u32 mask; if (smp2p->notified) return; - dev = &smp2p->ipa->pdev->dev; - smp2p->power_on = pm_runtime_get_if_active(dev, true) > 0; + smp2p->power_on = pm_runtime_get_if_active(smp2p->ipa->dev, true) > 0; /* Signal whether the IPA power is enabled */ mask = BIT(smp2p->enabled_bit); @@ -152,15 +150,16 @@ static void ipa_smp2p_panic_notifier_unregister(struct ipa_smp2p *smp2p) static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) { struct ipa_smp2p *smp2p = dev_id; + struct ipa *ipa = smp2p->ipa; struct device *dev; int ret; /* Ignore any (spurious) interrupts received after the first */ - if (smp2p->ipa->setup_complete) + if (ipa->setup_complete) return IRQ_HANDLED; /* Power needs to be active for setup */ - dev = &smp2p->ipa->pdev->dev; + dev = ipa->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) { dev_err(dev, "error %d getting power for setup\n", ret); @@ -168,7 +167,7 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id) } /* An error here won't cause driver shutdown, so warn if one occurs */ - ret = ipa_setup(smp2p->ipa); + ret = ipa_setup(ipa); WARN(ret != 0, "error %d from ipa_setup()\n", ret); out_power_put: @@ -179,14 +178,15 @@ out_power_put: } /* Initialize SMP2P interrupts */ -static int ipa_smp2p_irq_init(struct ipa_smp2p *smp2p, const char *name, - irq_handler_t handler) +static int ipa_smp2p_irq_init(struct ipa_smp2p *smp2p, + struct platform_device *pdev, + const char *name, irq_handler_t handler) { - struct device *dev = &smp2p->ipa->pdev->dev; + struct device *dev = &pdev->dev; unsigned int irq; int ret; - ret = platform_get_irq_byname(smp2p->ipa->pdev, name); + ret = platform_get_irq_byname(pdev, name); if (ret <= 0) return ret ? : -EINVAL; irq = ret; @@ -208,7 +208,7 @@ static void ipa_smp2p_irq_exit(struct ipa_smp2p *smp2p, u32 irq) /* Drop the power reference if it was taken in ipa_smp2p_notify() */ static void ipa_smp2p_power_release(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; if (!ipa->smp2p->power_on) return; @@ -219,10 +219,11 @@ static void ipa_smp2p_power_release(struct ipa *ipa) } /* Initialize the IPA SMP2P subsystem */ -int ipa_smp2p_init(struct ipa *ipa, bool modem_init) +int +ipa_smp2p_init(struct ipa *ipa, struct platform_device *pdev, bool modem_init) { struct qcom_smem_state *enabled_state; - struct device *dev = &ipa->pdev->dev; + struct device *dev = &pdev->dev; struct qcom_smem_state *valid_state; struct ipa_smp2p *smp2p; u32 enabled_bit; @@ -261,7 +262,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init) /* We have enough information saved to handle notifications */ ipa->smp2p = smp2p; - ret = ipa_smp2p_irq_init(smp2p, "ipa-clock-query", + ret = ipa_smp2p_irq_init(smp2p, pdev, "ipa-clock-query", ipa_smp2p_modem_clk_query_isr); if (ret < 0) goto err_null_smp2p; @@ -273,7 +274,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init) if (modem_init) { /* Result will be non-zero (negative for error) */ - ret = ipa_smp2p_irq_init(smp2p, "ipa-setup-ready", + ret = ipa_smp2p_irq_init(smp2p, pdev, "ipa-setup-ready", ipa_smp2p_modem_setup_ready_isr); if (ret < 0) goto err_notifier_unregister; diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h index 9b969b03d1a4..2a3d8eefb13b 100644 --- a/drivers/net/ipa/ipa_smp2p.h +++ b/drivers/net/ipa/ipa_smp2p.h @@ -8,17 +8,20 @@ #include <linux/types.h> +struct platform_device; + struct ipa; /** * ipa_smp2p_init() - Initialize the IPA SMP2P subsystem * @ipa: IPA pointer + * @pdev: Platform device pointer * @modem_init: Whether the modem is responsible for GSI initialization * * Return: 0 if successful, or a negative error code - * */ -int ipa_smp2p_init(struct ipa *ipa, bool modem_init); +int ipa_smp2p_init(struct ipa *ipa, struct platform_device *pdev, + bool modem_init); /** * ipa_smp2p_exit() - Inverse of ipa_smp2p_init() diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index 7b637bb8b41c..a24ac11b8893 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -163,7 +163,7 @@ ipa_table_mem(struct ipa *ipa, bool filter, bool hashed, bool ipv6) bool ipa_filtered_valid(struct ipa *ipa, u64 filtered) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; u32 count; if (!filtered) { @@ -236,8 +236,7 @@ ipa_filter_reset_table(struct ipa *ipa, bool hashed, bool ipv6, bool modem) trans = ipa_cmd_trans_alloc(ipa, hweight64(ep_mask)); if (!trans) { - dev_err(&ipa->pdev->dev, - "no transaction for %s filter reset\n", + dev_err(ipa->dev, "no transaction for %s filter reset\n", modem ? "modem" : "AP"); return -EBUSY; } @@ -298,8 +297,7 @@ static int ipa_route_reset(struct ipa *ipa, bool modem) trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2); if (!trans) { - dev_err(&ipa->pdev->dev, - "no transaction for %s route reset\n", + dev_err(ipa->dev, "no transaction for %s route reset\n", modem ? "modem" : "AP"); return -EBUSY; } @@ -327,7 +325,7 @@ static int ipa_route_reset(struct ipa *ipa, bool modem) void ipa_table_reset(struct ipa *ipa, bool modem) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; const char *ee_name; int ret; @@ -356,7 +354,7 @@ int ipa_table_hash_flush(struct ipa *ipa) trans = ipa_cmd_trans_alloc(ipa, 1); if (!trans) { - dev_err(&ipa->pdev->dev, "no transaction for hash flush\n"); + dev_err(ipa->dev, "no transaction for hash flush\n"); return -EBUSY; } @@ -469,7 +467,7 @@ int ipa_table_setup(struct ipa *ipa) */ trans = ipa_cmd_trans_alloc(ipa, 8); if (!trans) { - dev_err(&ipa->pdev->dev, "no transaction for table setup\n"); + dev_err(ipa->dev, "no transaction for table setup\n"); return -EBUSY; } @@ -713,7 +711,7 @@ bool ipa_table_mem_valid(struct ipa *ipa, bool filter) */ int ipa_table_init(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; dma_addr_t addr; __le64 le_addr; __le64 *virt; @@ -763,7 +761,7 @@ int ipa_table_init(struct ipa *ipa) void ipa_table_exit(struct ipa *ipa) { u32 count = max_t(u32, 1 + ipa->filter_count, ipa->route_count); - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; size_t size; size = IPA_ZERO_RULE_SIZE + (1 + count) * sizeof(__le64); diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index 7eaa0b4ebed9..bfd5dc6dab43 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -127,7 +127,7 @@ static struct ipa_uc_mem_area *ipa_uc_shared(struct ipa *ipa) static void ipa_uc_event_handler(struct ipa *ipa) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; if (shared->event == IPA_UC_EVENT_ERROR) dev_err(dev, "microcontroller error event\n"); @@ -141,7 +141,7 @@ static void ipa_uc_event_handler(struct ipa *ipa) static void ipa_uc_response_hdlr(struct ipa *ipa) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; /* An INIT_COMPLETED response message is sent to the AP by the * microcontroller when it is operational. Other than this, the AP @@ -191,7 +191,7 @@ void ipa_uc_config(struct ipa *ipa) /* Inverse of ipa_uc_config() */ void ipa_uc_deconfig(struct ipa *ipa) { - struct device *dev = &ipa->pdev->dev; + struct device *dev = ipa->dev; ipa_interrupt_disable(ipa, IPA_IRQ_UC_1); ipa_interrupt_disable(ipa, IPA_IRQ_UC_0); @@ -208,8 +208,8 @@ void ipa_uc_deconfig(struct ipa *ipa) /* Take a proxy power reference for the microcontroller */ void ipa_uc_power(struct ipa *ipa) { + struct device *dev = ipa->dev; static bool already; - struct device *dev; int ret; if (already) @@ -217,7 +217,6 @@ void ipa_uc_power(struct ipa *ipa) already = true; /* Only do this on first boot */ /* This power reference dropped in ipa_uc_response_hdlr() above */ - dev = &ipa->pdev->dev; ret = pm_runtime_get_sync(dev); if (ret < 0) { pm_runtime_put_noidle(dev); diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index 5e19a6839dea..e5a0987a263e 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -17,17 +17,6 @@ static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int nlmon_dev_init(struct net_device *dev) -{ - dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); - return dev->lstats == NULL ? -ENOMEM : 0; -} - -static void nlmon_dev_uninit(struct net_device *dev) -{ - free_percpu(dev->lstats); -} - struct nlmon { struct netlink_tap nt; }; @@ -51,15 +40,7 @@ static int nlmon_close(struct net_device *dev) static void nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - u64 packets, bytes; - - dev_lstats_read(dev, &packets, &bytes); - - stats->rx_packets = packets; - stats->tx_packets = 0; - - stats->rx_bytes = bytes; - stats->tx_bytes = 0; + dev_lstats_read(dev, &stats->rx_packets, &stats->rx_bytes); } static u32 always_on(struct net_device *dev) @@ -72,8 +53,6 @@ static const struct ethtool_ops nlmon_ethtool_ops = { }; static const struct net_device_ops nlmon_ops = { - .ndo_init = nlmon_dev_init, - .ndo_uninit = nlmon_dev_uninit, .ndo_open = nlmon_open, .ndo_stop = nlmon_close, .ndo_start_xmit = nlmon_xmit, @@ -92,6 +71,7 @@ static void nlmon_setup(struct net_device *dev) dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->flags = IFF_NOARP; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; /* That's rather a softlimit here, which, of course, * can be altered. Not a real MTU, but what is to be diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index a176653c8861..df275b4fccb6 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -263,7 +263,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) * call skb_cow_data, so that there's no chance that data is removed * from the skb, so that later we can extract the original endpoint. */ - offset = skb->data - skb_network_header(skb); + offset = -skb_network_offset(skb); skb_push(skb, offset); num_frags = skb_cow_data(skb, 0, &trailer); offset += sizeof(struct message_data); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index e502f4ee9e1f..782ddc8c296b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1015,8 +1015,7 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb, netdev_features_t netdev_flags = NETIF_F_CSUM_MASK | NETIF_F_SG; u8 tid; - snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) + - tcp_hdrlen(skb); + snap_ip_tcp = 8 + skb_network_header_len(skb) + tcp_hdrlen(skb); if (!mvmsta->max_amsdu_len || !ieee80211_is_data_qos(hdr->frame_control) || diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 6c2b37e56c78..fa8eba47dc4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1331,7 +1331,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, trans->txqs.tfd.size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0); - ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb); + ip_hdrlen = skb_network_header_len(skb); snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len; amsdu_pad = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c index d3bde2d010b7..33973a60d0bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c @@ -353,7 +353,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, start_len, 0); - ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb); + ip_hdrlen = skb_network_header_len(skb); snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; amsdu_pad = 0; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index a6d596e05602..3692b56cb58d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1344,7 +1344,6 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) { - struct page *page; struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; unsigned int noreclaim_flag; @@ -1355,11 +1354,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) if (queue->hdr_digest || queue->data_digest) nvme_tcp_free_crypto(queue); - if (queue->pf_cache.va) { - page = virt_to_head_page(queue->pf_cache.va); - __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); - queue->pf_cache.va = NULL; - } + page_frag_cache_drain(&queue->pf_cache); noreclaim_flag = memalloc_noreclaim_save(); /* ->sock will be released by fput() */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c8655fc5aa5b..2aa5762e9f50 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1591,7 +1591,6 @@ static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { - struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1615,8 +1614,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) if (queue->hdr_digest || queue->data_digest) nvmet_tcp_free_crypto(queue); ida_free(&nvmet_tcp_queue_ida, queue->idx); - page = virt_to_head_page(queue->pf_cache.va); - __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); + page_frag_cache_drain(&queue->pf_cache); kfree(queue); } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f2ed7167c848..4b2fcb228a0a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -141,10 +141,8 @@ struct vhost_net { unsigned tx_zcopy_err; /* Flush in progress. Protected by tx vq lock. */ bool tx_flush; - /* Private page frag */ - struct page_frag page_frag; - /* Refcount bias of page frag */ - int refcnt_bias; + /* Private page frag cache */ + struct page_frag_cache pf_cache; }; static unsigned vhost_net_zcopy_mask __read_mostly; @@ -655,41 +653,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) !vhost_vq_avail_empty(vq->dev, vq); } -static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, - struct page_frag *pfrag, gfp_t gfp) -{ - if (pfrag->page) { - if (pfrag->offset + sz <= pfrag->size) - return true; - __page_frag_cache_drain(pfrag->page, net->refcnt_bias); - } - - pfrag->offset = 0; - net->refcnt_bias = 0; - if (SKB_FRAG_PAGE_ORDER) { - /* Avoid direct reclaim but allow kswapd to wake */ - pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | - __GFP_COMP | __GFP_NOWARN | - __GFP_NORETRY, - SKB_FRAG_PAGE_ORDER); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; - goto done; - } - } - pfrag->page = alloc_page(gfp); - if (likely(pfrag->page)) { - pfrag->size = PAGE_SIZE; - goto done; - } - return false; - -done: - net->refcnt_bias = USHRT_MAX; - page_ref_add(pfrag->page, USHRT_MAX - 1); - return true; -} - #define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, @@ -699,7 +662,6 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); struct socket *sock = vhost_vq_get_backend(vq); - struct page_frag *alloc_frag = &net->page_frag; struct virtio_net_hdr *gso; struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; struct tun_xdp_hdr *hdr; @@ -710,6 +672,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, int sock_hlen = nvq->sock_hlen; void *buf; int copied; + int ret; if (unlikely(len < nvq->sock_hlen)) return -EFAULT; @@ -719,18 +682,17 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, return -ENOSPC; buflen += SKB_DATA_ALIGN(len + pad); - alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); - if (unlikely(!vhost_net_page_frag_refill(net, buflen, - alloc_frag, GFP_KERNEL))) + buf = page_frag_alloc_align(&net->pf_cache, buflen, GFP_KERNEL, + SMP_CACHE_BYTES); + if (unlikely(!buf)) return -ENOMEM; - buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; - copied = copy_page_from_iter(alloc_frag->page, - alloc_frag->offset + - offsetof(struct tun_xdp_hdr, gso), - sock_hlen, from); - if (copied != sock_hlen) - return -EFAULT; + copied = copy_from_iter(buf + offsetof(struct tun_xdp_hdr, gso), + sock_hlen, from); + if (copied != sock_hlen) { + ret = -EFAULT; + goto err; + } hdr = buf; gso = &hdr->gso; @@ -743,27 +705,30 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, vhost16_to_cpu(vq, gso->csum_start) + vhost16_to_cpu(vq, gso->csum_offset) + 2); - if (vhost16_to_cpu(vq, gso->hdr_len) > len) - return -EINVAL; + if (vhost16_to_cpu(vq, gso->hdr_len) > len) { + ret = -EINVAL; + goto err; + } } len -= sock_hlen; - copied = copy_page_from_iter(alloc_frag->page, - alloc_frag->offset + pad, - len, from); - if (copied != len) - return -EFAULT; + copied = copy_from_iter(buf + pad, len, from); + if (copied != len) { + ret = -EFAULT; + goto err; + } xdp_init_buff(xdp, buflen, NULL); xdp_prepare_buff(xdp, buf, pad, len, true); hdr->buflen = buflen; - --net->refcnt_bias; - alloc_frag->offset += buflen; - ++nvq->batched_xdp; return 0; + +err: + page_frag_free(buf); + return ret; } static void handle_tx_copy(struct vhost_net *net, struct socket *sock) @@ -1353,8 +1318,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) vqs[VHOST_NET_VQ_RX]); f->private_data = n; - n->page_frag.page = NULL; - n->refcnt_bias = 0; + n->pf_cache.va = NULL; return 0; } @@ -1422,8 +1386,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); kfree(n->vqs[VHOST_NET_VQ_TX].xdp); kfree(n->dev.vqs); - if (n->page_frag.page) - __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); + page_frag_cache_drain(&n->pf_cache); kvfree(n); return 0; } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index de292a007138..6cef1c241180 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -311,15 +311,23 @@ extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; +void page_frag_cache_drain(struct page_frag_cache *nc); extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } extern void page_frag_free(void *addr); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1470b74fb6d2..3013355b63f5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -822,9 +822,9 @@ typedef unsigned char *sk_buff_data_t; * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * delivery_time in mono clock base (i.e., EDT) or a clock base chosen + * by SO_TXTIME. If zero, skb->tstamp has the (rcv) timestamp at + * ingress. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -3036,6 +3036,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb) static inline u32 skb_network_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 988a30ef6bfe..55399ee2a57e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -304,7 +304,7 @@ struct tcp_sock { __cacheline_group_end(tcp_sock_write_txrx); /* RX read-write hotpath cache lines */ - __cacheline_group_begin(tcp_sock_write_rx); + __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes diff --git a/include/net/gro.h b/include/net/gro.h index b435f0ddbf64..2b58671a6549 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -139,21 +139,16 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) NAPI_GRO_CB(skb)->data_offset += len; } -static inline void *skb_gro_header_fast(struct sk_buff *skb, +static inline void *skb_gro_header_fast(const struct sk_buff *skb, unsigned int offset) { return NAPI_GRO_CB(skb)->frag0 + offset; } -static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +static inline bool skb_gro_may_pull(const struct sk_buff *skb, + unsigned int hlen) { - return NAPI_GRO_CB(skb)->frag0_len < hlen; -} - -static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) -{ - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); } static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, @@ -162,28 +157,30 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, if (!pskb_may_pull(skb, hlen)) return NULL; - skb_gro_frag0_invalidate(skb); return skb->data + offset; } -static inline void *skb_gro_header(struct sk_buff *skb, - unsigned int hlen, unsigned int offset) +static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) { void *ptr; ptr = skb_gro_header_fast(skb, offset); - if (skb_gro_header_hard(skb, hlen)) + if (!skb_gro_may_pull(skb, hlen)) ptr = skb_gro_header_slow(skb, hlen, offset); return ptr; } -static inline void *skb_gro_network_header(struct sk_buff *skb) +static inline void *skb_gro_network_header(const struct sk_buff *skb) { - return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + - skb_network_offset(skb); + if (skb_gro_may_pull(skb, skb_gro_offset(skb))) + return skb_gro_header_fast(skb, skb_network_offset(skb)); + + return skb_network_header(skb); } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct iphdr *iph = skb_gro_network_header(skb); @@ -421,7 +418,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) return uh; } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct ipv6hdr *iph = skb_gro_network_header(skb); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 5a568bbbeaeb..82243cb6c54d 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1358,7 +1358,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum cgroup_bpf_attach_type atype) { - unsigned int offset = skb->data - skb_network_header(skb); + unsigned int offset = -skb_network_offset(skb); struct sock *save_sk; void *saved_data_end; struct cgroup *cgrp; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 150d4f23b010..06aa1ebbd21c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4685,8 +4685,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, gfp_t gfp = gfp_mask; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | - __GFP_NOMEMALLOC; + gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | + __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER); nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; @@ -4699,6 +4699,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc, return page; } +void page_frag_cache_drain(struct page_frag_cache *nc) +{ + if (!nc->va) + return; + + __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias); + nc->va = NULL; +} +EXPORT_SYMBOL(page_frag_cache_drain); + void __page_frag_cache_drain(struct page *page, unsigned int count) { VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); @@ -4708,9 +4718,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) } EXPORT_SYMBOL(__page_frag_cache_drain); -void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask) +void *__page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask) { unsigned int size = PAGE_SIZE; struct page *page; @@ -4779,7 +4789,7 @@ refill: return nc->va + offset; } -EXPORT_SYMBOL(page_frag_alloc_align); +EXPORT_SYMBOL(__page_frag_alloc_align); /* * Frees a page fragment allocated out of either a compound or order 0 page. diff --git a/net/core/gro.c b/net/core/gro.c index 0759277dc14e..6a0edbd826a1 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -369,15 +369,21 @@ static void gro_list_prepare(const struct list_head *head, static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) { - const struct skb_shared_info *pinfo = skb_shinfo(skb); - const skb_frag_t *frag0 = &pinfo->frags[0]; + const struct skb_shared_info *pinfo; + const skb_frag_t *frag0; + unsigned int headlen; NAPI_GRO_CB(skb)->data_offset = 0; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + headlen = skb_headlen(skb); + NAPI_GRO_CB(skb)->frag0 = skb->data; + NAPI_GRO_CB(skb)->frag0_len = headlen; + if (headlen) + return; + + pinfo = skb_shinfo(skb); + frag0 = &pinfo->frags[0]; - if (!skb_headlen(skb) && pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0)) && + if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) && (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, @@ -700,7 +706,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) skb_reset_mac_header(skb); skb_gro_reset_offset(skb, hlen); - if (unlikely(skb_gro_header_hard(skb, hlen))) { + if (unlikely(!skb_gro_may_pull(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { net_warn_ratelimited("%s: dropping impossible skb from %s\n", @@ -710,7 +716,10 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) } } else { eth = (const struct ethhdr *)skb->data; - gro_pull_from_frag0(skb, hlen); + + if (NAPI_GRO_CB(skb)->frag0 != skb->data) + gro_pull_from_frag0(skb, hlen); + NAPI_GRO_CB(skb)->frag0 += hlen; NAPI_GRO_CB(skb)->frag0_len -= hlen; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1f918e602bc4..43d7fc150acc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -315,7 +315,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) fragsz = SKB_DATA_ALIGN(fragsz); - return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); } EXPORT_SYMBOL(__napi_alloc_frag_align); @@ -327,13 +328,15 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, + align_mask); } else { struct napi_alloc_cache *nc; local_bh_disable(); nc = this_cpu_ptr(&napi_alloc_cache); - data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask); + data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, + align_mask); local_bh_enable(); } return data; diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 0c41076e31ed..a8494f796dca 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -351,7 +351,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, optlen = guehdr->hlen << 2; len += optlen; - if (skb_gro_header_hard(skb, len)) { + if (!skb_gro_may_pull(skb, len)) { guehdr = skb_gro_header_slow(skb, len, off); if (unlikely(!guehdr)) goto out; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 311e70bfce40..5028c72d494a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -174,7 +174,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head, grehlen += GRE_HEADER_SECTION; hlen = off + grehlen; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { greh = skb_gro_header_slow(skb, hlen, off); if (unlikely(!greh)) goto out; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1fe794967211..33f93dc730a3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1458,6 +1458,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7d2bdfd7e7d7..494a6284bd7e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -310,7 +310,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } nf_reset_ct(skb); - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); raw_rcv_skb(sk, skb); return 0; @@ -353,6 +353,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82dc42f57c6..7e1b848398d0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4651,7 +4651,7 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 105); /* TXRX read-write hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 8311c38267b5..b955ab3b236d 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -204,7 +204,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) goto out; hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { + if (!skb_gro_may_pull(skb, hlen)) { th = skb_gro_header_slow(skb, hlen, off); if (unlikely(!th)) goto out; @@ -299,18 +299,20 @@ out: void tcp_gro_complete(struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); + struct skb_shared_info *shinfo; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; skb->csum_start = (unsigned char *)th - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + shinfo = skb_shinfo(skb); + shinfo->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - if (skb->encapsulation) - skb->inner_transport_header = skb->transport_header; + shinfo->gso_type |= SKB_GSO_TCP_ECN; } EXPORT_SYMBOL(tcp_gro_complete); @@ -335,10 +337,9 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; - if (NAPI_GRO_CB(skb)->is_atomic) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4 | + (NAPI_GRO_CB(skb)->is_atomic * SKB_GSO_TCP_FIXEDID); tcp_gro_complete(skb); return 0; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index c54676998eb6..dae35101d189 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -58,7 +58,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) return -iph->protocol; #endif - __skb_push(skb, skb->data - skb_network_header(skb)); + __skb_push(skb, -skb_network_offset(skb)); iph->tot_len = htons(skb->len); ip_send_check(iph); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 26f1e4a5ade0..25a5b394481b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -804,7 +804,7 @@ looped_back: ip6_route_input(skb); if (skb_dst(skb)->error) { - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; } @@ -821,7 +821,7 @@ looped_back: goto looped_back; } - skb_push(skb, skb->data - skb_network_header(skb)); + skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b9dd3a66e423..02eeca5492cd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1925,7 +1925,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5fd07581efaf..e9cc315832cb 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -247,7 +247,6 @@ static void ip6_dev_free(struct net_device *dev) gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); - free_percpu(dev->tstats); } static int ip6_tnl_create2(struct net_device *dev) @@ -1848,6 +1847,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->features |= NETIF_F_LLTX; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netif_keep_dst(dev); dev->features |= IPXIPX_FEATURES; @@ -1873,13 +1873,10 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) - goto free_stats; + return ret; ret = gro_cells_init(&t->gro_cells, dev); if (ret) @@ -1903,9 +1900,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev) destroy_dst: dst_cache_destroy(&t->dst_cache); -free_stats: - free_percpu(dev->tstats); - dev->tstats = NULL; return ret; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index b2dd48911c8d..1a51a44571c3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -327,9 +327,9 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, if (!reasm_data) goto err; - payload_len = ((skb->data - skb_network_header(skb)) - + payload_len = -skb_network_offset(skb) - sizeof(struct ipv6hdr) + fq->q.len - - sizeof(struct frag_hdr)); + sizeof(struct frag_hdr); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 76e6eb3b643d..779274055abf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -615,7 +615,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_put(skb, length); skb_reset_network_header(skb); iph = ipv6_hdr(skb); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5ebc47da1000..acb4f119e11f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -272,9 +272,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, if (!reasm_data) goto out_oom; - payload_len = ((skb->data - skb_network_header(skb)) - + payload_len = -skb_network_offset(skb) - sizeof(struct ipv6hdr) + fq->q.len - - sizeof(struct frag_hdr)); + sizeof(struct frag_hdr); if (payload_len > IPV6_MAXPLEN) goto out_oversize; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 6e36e5047fba..a17d783dc7c0 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -43,7 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk, int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); - int nhlen = skb->data - skb_network_header(skb); + int nhlen = -skb_network_offset(skb); skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index 670da7822e6c..c30a2a90a192 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -32,8 +32,9 @@ const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] }; /* MPTCP_PM_CMD_GET_ADDR - do */ -const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { - [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, }; /* MPTCP_PM_CMD_FLUSH_ADDRS - do */ @@ -110,7 +111,7 @@ const struct genl_ops mptcp_pm_nl_ops[11] = { .doit = mptcp_pm_nl_get_addr_doit, .dumpit = mptcp_pm_nl_get_addr_dumpit, .policy = mptcp_pm_get_addr_nl_policy, - .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .maxattr = MPTCP_PM_ATTR_TOKEN, .flags = GENL_UNS_ADMIN_PERM, }, { diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h index ac9fc7225b6a..e24258f6f819 100644 --- a/net/mptcp/mptcp_pm_gen.h +++ b/net/mptcp/mptcp_pm_gen.h @@ -18,7 +18,7 @@ extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADD extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; -extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; +extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ATTR_TOKEN + 1]; extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 53e0b08b1123..b4bdd92a5648 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -441,6 +441,22 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); } +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info) +{ + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_get_addr(skb, info); + return mptcp_pm_nl_get_addr(skb, info); +} + +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb) +{ + const struct genl_info *info = genl_info_dump(cb); + + if (info->attrs[MPTCP_PM_ATTR_TOKEN]) + return mptcp_userspace_pm_dump_addr(msg, cb); + return mptcp_pm_nl_dump_addr(msg, cb); +} + int mptcp_pm_set_flags(struct net *net, struct nlattr *token, struct mptcp_pm_addr_entry *loc, struct mptcp_pm_addr_entry *rem, u8 bkup) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d5a942b9ab29..16f8bd47f4b8 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -18,9 +18,6 @@ #include "protocol.h" #include "mib.h" -/* forward declaration */ -static struct genl_family mptcp_genl_family; - static int pm_nl_pernet_id; struct mptcp_pm_add_entry { @@ -1550,8 +1547,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) } } -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; @@ -1636,8 +1633,8 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) return 0; } -static int mptcp_nl_fill_addr(struct sk_buff *skb, - struct mptcp_pm_addr_entry *entry) +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry) { struct mptcp_addr_info *addr = &entry->addr; struct nlattr *attr; @@ -1675,7 +1672,7 @@ nla_put_failure: return -EMSGSIZE; } -int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); @@ -1725,8 +1722,13 @@ fail: return ret; } -int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + return mptcp_pm_get_addr(skb, info); +} + +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) { struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; @@ -1768,6 +1770,12 @@ int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, return msg->len; } +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return mptcp_pm_dump_addr(msg, cb); +} + static int parse_limit(struct genl_info *info, int id, unsigned int *limit) { struct nlattr *attr = info->attrs[id]; @@ -2281,7 +2289,7 @@ nla_put_failure: nlmsg_free(skb); } -static struct genl_family mptcp_genl_family __ro_after_init = { +struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, .netnsok = true, diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index bc97cc30f013..b9809d988693 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -106,19 +106,26 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, return -EINVAL; } +static struct mptcp_pm_addr_entry * +mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (entry->addr.id == id) + return entry; + } + return NULL; +} + int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { - struct mptcp_pm_addr_entry *entry, *match = NULL; + struct mptcp_pm_addr_entry *match; spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (id == entry->addr.id) { - match = entry; - break; - } - } + match = mptcp_userspace_pm_lookup_addr_by_id(msk, id); spin_unlock_bh(&msk->pm.lock); if (match) { *flags = match->flags; @@ -261,7 +268,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; - struct mptcp_pm_addr_entry *match = NULL; + struct mptcp_pm_addr_entry *match; struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; LIST_HEAD(free_list); @@ -298,13 +305,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) lock_sock(sk); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (entry->addr.id == id_val) { - match = entry; - break; - } - } - + match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { GENL_SET_ERR_MSG(info, "address with specified id not found"); release_sock(sk); @@ -334,7 +335,6 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry local = { 0 }; struct mptcp_addr_info addr_r; - struct mptcp_addr_info addr_l; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; @@ -360,25 +360,31 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) goto create_err; } - err = mptcp_pm_parse_addr(laddr, info, &addr_l); + err = mptcp_pm_parse_entry(laddr, info, true, &local); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto create_err; } + if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + GENL_SET_ERR_MSG(info, "invalid addr flags"); + err = -EINVAL; + goto create_err; + } + local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + err = mptcp_pm_parse_addr(raddr, info, &addr_r); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); goto create_err; } - if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) { + if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; goto create_err; } - local.addr = addr_l; err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); @@ -387,7 +393,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) lock_sock(sk); - err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); + err = __mptcp_subflow_connect(sk, &local.addr, &addr_r); release_sock(sk); @@ -572,3 +578,137 @@ set_flags_err: sock_put(sk); return ret; } + +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct id_bitmap { + DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1); + } *bitmap; + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(msg->sk); + struct mptcp_pm_addr_entry *entry; + struct mptcp_sock *msk; + struct nlattr *token; + int ret = -EINVAL; + struct sock *sk; + void *hdr; + + bitmap = (struct id_bitmap *)cb->ctx; + token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + + msk = mptcp_token_get_sock(net, nla_get_u32(token)); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return ret; + } + + sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto out; + } + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (test_bit(entry->addr.id, bitmap->map)) + continue; + + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &mptcp_genl_family, + NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); + if (!hdr) + break; + + if (mptcp_nl_fill_addr(msg, entry) < 0) { + genlmsg_cancel(msg, hdr); + break; + } + + __set_bit(entry->addr.id, bitmap->map); + genlmsg_end(msg, hdr); + } + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + ret = msg->len; + +out: + sock_put(sk); + return ret; +} + +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct mptcp_pm_addr_entry addr, *entry; + struct net *net = sock_net(skb->sk); + struct mptcp_sock *msk; + struct sk_buff *msg; + int ret = -EINVAL; + struct sock *sk; + void *reply; + + msk = mptcp_token_get_sock(net, nla_get_u32(token)); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return ret; + } + + sk = (struct sock *)msk; + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto out; + } + + ret = mptcp_pm_parse_entry(attr, info, false, &addr); + if (ret < 0) + goto out; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, + info->genlhdr->cmd); + if (!reply) { + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); + ret = -EMSGSIZE; + goto fail; + } + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + entry = mptcp_userspace_pm_lookup_addr_by_id(msk, addr.addr.id); + if (!entry) { + GENL_SET_ERR_MSG(info, "address not found"); + ret = -EINVAL; + goto unlock_fail; + } + + ret = mptcp_nl_fill_addr(msg, entry); + if (ret) + goto unlock_fail; + + genlmsg_end(msg, reply); + ret = genlmsg_reply(msg, info); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + sock_put(sk); + return ret; + +unlock_fail: + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); +fail: + nlmsg_free(msg); +out: + sock_put(sk); + return ret; +} diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 76c8861a852b..99367c40de0d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1692,15 +1692,6 @@ out: } } -static void mptcp_set_nospace(struct sock *sk) -{ - /* enable autotune */ - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - - /* will be cleared on avail space */ - set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); -} - static int mptcp_disconnect(struct sock *sk, int flags); static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, @@ -1771,6 +1762,30 @@ static int do_copy_data_nocache(struct sock *sk, int copy, return 0; } +/* open-code sk_stream_memory_free() plus sent limit computation to + * avoid indirect calls in fast-path. + * Called under the msk socket lock, so we can avoid a bunch of ONCE + * annotations. + */ +static u32 mptcp_send_limit(const struct sock *sk) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 limit, not_sent; + + if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf)) + return 0; + + limit = mptcp_notsent_lowat(sk); + if (limit == UINT_MAX) + return UINT_MAX; + + not_sent = msk->write_seq - msk->snd_nxt; + if (not_sent >= limit) + return 0; + + return limit - not_sent; +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1815,6 +1830,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct mptcp_data_frag *dfrag; bool dfrag_collapsed; size_t psize, offset; + u32 copy_limit; + + /* ensure fitting the notsent_lowat() constraint */ + copy_limit = mptcp_send_limit(sk); + if (!copy_limit) + goto wait_for_memory; /* reuse tail pfrag, if possible, or carve a new one from the * page allocator @@ -1822,9 +1843,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dfrag = mptcp_pending_tail(sk); dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag); if (!dfrag_collapsed) { - if (!sk_stream_memory_free(sk)) - goto wait_for_memory; - if (!mptcp_page_frag_refill(sk, pfrag)) goto wait_for_memory; @@ -1839,6 +1857,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) offset = dfrag->offset + dfrag->data_len; psize = pfrag->size - offset; psize = min_t(size_t, psize, msg_data_left(msg)); + psize = min_t(size_t, psize, copy_limit); total_ts = psize + frag_truesize; if (!sk_wmem_schedule(sk, total_ts)) @@ -1874,7 +1893,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) continue; wait_for_memory: - mptcp_set_nospace(sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) @@ -3769,6 +3788,7 @@ static struct proto mptcp_prot = { .unhash = mptcp_unhash, .get_port = mptcp_get_port, .forward_alloc_get = mptcp_forward_alloc_get, + .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, @@ -3942,12 +3962,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - if (sk_stream_is_writeable(sk)) + if (__mptcp_stream_is_writeable(sk, 1)) return EPOLLOUT | EPOLLWRNORM; - mptcp_set_nospace(sk); - smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ - if (sk_stream_is_writeable(sk)) + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + smp_mb__after_atomic(); /* NOSPACE is changed by mptcp_write_space() */ + if (__mptcp_stream_is_writeable(sk, 1)) return EPOLLOUT | EPOLLWRNORM; return 0; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d0a7955b96c4..de9f0ff6dd30 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,10 +113,9 @@ #define MPTCP_RST_TRANSIENT BIT(0) /* MPTCP socket atomic flags */ -#define MPTCP_NOSPACE 1 -#define MPTCP_WORK_RTX 2 -#define MPTCP_FALLBACK_DONE 4 -#define MPTCP_WORK_CLOSE_SUBFLOW 5 +#define MPTCP_WORK_RTX 1 +#define MPTCP_FALLBACK_DONE 2 +#define MPTCP_WORK_CLOSE_SUBFLOW 3 /* MPTCP socket release cb flags */ #define MPTCP_PUSH_PENDING 1 @@ -308,6 +307,7 @@ struct mptcp_sock { in_accept_queue:1, free_first:1, rcvspace_init:1; + u32 notsent_lowat; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -343,6 +343,8 @@ struct mptcp_sock { #define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) +extern struct genl_family mptcp_genl_family; + static inline void msk_owned_by_me(const struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); @@ -808,14 +810,36 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline u32 mptcp_notsent_lowat(const struct sock *sk) +{ + struct net *net = sock_net(sk); + u32 val; + + val = READ_ONCE(mptcp_sk(sk)->notsent_lowat); + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); +} + +static inline bool mptcp_stream_memory_free(const struct sock *sk, int wake) +{ + const struct mptcp_sock *msk = mptcp_sk(sk); + u32 notsent_bytes; + + notsent_bytes = READ_ONCE(msk->write_seq) - READ_ONCE(msk->snd_nxt); + return (notsent_bytes << wake) < mptcp_notsent_lowat(sk); +} + +static inline bool __mptcp_stream_is_writeable(const struct sock *sk, int wake) +{ + return mptcp_stream_memory_free(sk, wake) && + __sk_stream_is_writeable(sk, wake); +} + static inline void mptcp_write_space(struct sock *sk) { - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) - sk_stream_write_space(sk); - } + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (mptcp_stream_memory_free(sk, 1)) + sk_stream_write_space(sk); } static inline void __mptcp_sync_sndbuf(struct sock *sk) @@ -959,8 +983,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); -void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -976,6 +998,8 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); +int mptcp_nl_fill_addr(struct sk_buff *skb, + struct mptcp_pm_addr_entry *entry); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -1040,6 +1064,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); +int mptcp_pm_nl_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, + struct netlink_callback *cb); +int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr(struct sk_buff *skb, struct genl_info *info); +int mptcp_userspace_pm_get_addr(struct sk_buff *skb, + struct genl_info *info); static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index da37e4541a5d..dcd1c76d2a3b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -624,20 +624,11 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t return ret; } -static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val; - if (optlen < sizeof(int)) - return -EINVAL; - - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; - - lock_sock(sk); sockopt_seq_inc(msk); msk->cork = !!val; mptcp_for_each_subflow(msk, subflow) { @@ -649,25 +640,15 @@ static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optva } if (!val) mptcp_check_and_set_pending(sk); - release_sock(sk); return 0; } -static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, - unsigned int optlen) +static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - int val; - - if (optlen < sizeof(int)) - return -EINVAL; - - if (copy_from_sockptr(&val, optval, sizeof(val))) - return -EFAULT; - lock_sock(sk); sockopt_seq_inc(msk); msk->nodelay = !!val; mptcp_for_each_subflow(msk, subflow) { @@ -679,8 +660,6 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op } if (val) mptcp_check_and_set_pending(sk); - release_sock(sk); - return 0; } @@ -803,25 +782,10 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, int ret, val; switch (optname) { - case TCP_INQ: - ret = mptcp_get_int_option(msk, optval, optlen, &val); - if (ret) - return ret; - if (val < 0 || val > 1) - return -EINVAL; - - lock_sock(sk); - msk->recvmsg_inq = !!val; - release_sock(sk); - return 0; case TCP_ULP: return -EOPNOTSUPP; case TCP_CONGESTION: return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); - case TCP_CORK: - return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); - case TCP_NODELAY: - return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); @@ -834,7 +798,34 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, optval, optlen); } - return -EOPNOTSUPP; + ret = mptcp_get_int_option(msk, optval, optlen, &val); + if (ret) + return ret; + + lock_sock(sk); + switch (optname) { + case TCP_INQ: + if (val < 0 || val > 1) + ret = -EINVAL; + else + msk->recvmsg_inq = !!val; + break; + case TCP_NOTSENT_LOWAT: + WRITE_ONCE(msk->notsent_lowat, val); + mptcp_write_space(sk); + break; + case TCP_CORK: + ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); + break; + case TCP_NODELAY: + ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); + break; + default: + ret = -ENOPROTOOPT; + } + + release_sock(sk); + return ret; } int mptcp_setsockopt(struct sock *sk, int level, int optname, @@ -1349,6 +1340,8 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->cork); case TCP_NODELAY: return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); + case TCP_NOTSENT_LOWAT: + return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); } return -EOPNOTSUPP; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9bbc2686690..0db31ca4982d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2057,7 +2057,7 @@ retry: skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; - + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2586,6 +2586,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); skb->tstamp = sockc->transmit_time; + skb->mono_delivery_time = !!skb->tstamp; skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3064,6 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; skb->tstamp = sockc.transmit_time; + skb->mono_delivery_time = !!skb->tstamp; if (unlikely(extra_len == 4)) skb->no_fcs = 1; diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 616d3581419c..31252bc8775e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -869,7 +869,7 @@ bloom_simple_test() bloom_complex_test() { # Bloom filter index computation is affected from region ID, eRP - # ID and from the region key size. In order to excercise those parts + # ID and from the region key size. In order to exercise those parts # of the Bloom filter code, use a series of regions, each with a # different key size and send packet that should hit all of them. local index diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh index 7d7829f57550..6c52ce1b0450 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -49,7 +49,7 @@ for o in llrs rs; do Active FEC encoding: ${o^^}" done -# Test mutliple bits +# Test multiple bits $ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 1267d5708e13..955ee651dcd5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -21,6 +21,7 @@ cinfail="" cinsent="" tmpfile="" cout="" +err="" capout="" ns1="" ns2="" @@ -189,6 +190,7 @@ init() { cin=$(mktemp) cinsent=$(mktemp) cout=$(mktemp) + err=$(mktemp) evts_ns1=$(mktemp) evts_ns2=$(mktemp) @@ -204,6 +206,7 @@ cleanup() rm -f "$sin" "$sout" "$cinsent" "$cinfail" rm -f "$tmpfile" rm -rf $evts_ns1 $evts_ns2 + rm -f "$err" cleanup_partial } @@ -3356,6 +3359,77 @@ userspace_pm_rm_sf() wait_rm_sf $1 "${cnt}" } +check_output() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local rc=0 + + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + fail_test "fail to check output # error ${rc}" + elif [ ${rc} -eq 0 ]; then + print_ok + elif [ ${rc} -eq 1 ]; then + fail_test "fail to check output # different output" + fi +} + +# $1: ns +userspace_pm_dump() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl dump token $tk +} + +# $1: ns ; $2: id +userspace_pm_get_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl get $2 token $tk +} + +userspace_pm_chk_dump_addr() +{ + local ns="${1}" + local exp="${2}" + local check="${3}" + + print_check "dump addrs ${check}" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then + check_output "userspace_pm_dump ${ns}" "${exp}" + else + print_skip + fi +} + +userspace_pm_chk_get_addr() +{ + local ns="${1}" + local id="${2}" + local exp="${3}" + + print_check "get id ${id} addr" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then + check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}" + else + print_skip + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3447,10 +3521,18 @@ userspace_tests() chk_mptcp_info subflows 2 subflows 2 chk_subflows_total 3 3 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + userspace_pm_chk_dump_addr "${ns1}" \ + $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \ + "signal" + userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" + userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" \ + "id 20 flags signal 10.0.3.1" "after rm_addr 10" userspace_pm_rm_addr $ns1 20 userspace_pm_rm_sf $ns1 10.0.3.1 $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" chk_rm_nr 2 2 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 @@ -3471,8 +3553,15 @@ userspace_tests() chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 20 flags subflow 10.0.3.2" \ + "subflow" + userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns2}" \ + "" \ + "after rm_addr 20" chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 @@ -3492,6 +3581,8 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 userspace_pm_add_sf $ns2 10.0.3.2 0 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 0 flags subflow 10.0.3.2" "id 0 subflow" chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 108a1e12436c..438f557aac90 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -319,3 +319,26 @@ mptcp_lib_wait_local_port_listen() { sleep 0.1 done } + +mptcp_lib_check_output() { + local err="${1}" + local cmd="${2}" + local expected="${3}" + local cmd_ret=0 + local out + + if ! out=$(${cmd} 2>"${err}"); then + cmd_ret=${?} + fi + + if [ ${cmd_ret} -ne 0 ]; then + mptcp_lib_print_err "[FAIL] command execution '${cmd}' stderr" + cat "${err}" + return 2 + elif [ "${out}" = "${expected}" ]; then + return 0 + else + mptcp_lib_print_err "[FAIL] expected '${expected}' got '${out}'" + return 1 + fi +} diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ebfefae71e13..705106d60db5 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -54,21 +54,17 @@ check() local cmd="$1" local expected="$2" local msg="$3" - local out=`$cmd 2>$err` - local cmd_ret=$? + local rc=0 printf "%-50s" "$msg" - if [ $cmd_ret -ne 0 ]; then - echo "[FAIL] command execution '$cmd' stderr " - cat $err - mptcp_lib_result_fail "${msg} # error ${cmd_ret}" + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + mptcp_lib_result_fail "${msg} # error ${rc}" ret=1 - elif [ "$out" = "$expected" ]; then - echo "[ OK ]" + elif [ ${rc} -eq 0 ]; then + mptcp_lib_print_ok "[ OK ]" mptcp_lib_result_pass "${msg}" - else - echo -n "[FAIL] " - echo "expected '$expected' got '$out'" + elif [ ${rc} -eq 1 ]; then mptcp_lib_result_fail "${msg} # different output" ret=1 fi diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 49369c4a5f26..7426a2cbd4a0 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -453,6 +453,7 @@ int csf(int fd, int pm_family, int argc, char *argv[]) char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW; const char *params[5]; struct nlmsghdr *nh; struct rtattr *addr; @@ -558,6 +559,13 @@ int csf(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); } + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + addr->rta_len = off - addr_start; } @@ -1079,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t token = 0; int nest_start; u_int8_t id; int off = 0; @@ -1089,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) MPTCP_PM_VER); /* the only argument is the address id */ - if (argc != 3) + if (argc != 3 && argc != 5) syntax(argv); id = atoi(argv[2]); + if (argc == 5 && !strcmp(argv[3], "token")) + token = strtoul(argv[4], NULL, 10); nest_start = off; nest = (void *)(data + off); @@ -1108,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -1119,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) 1024]; pid_t pid = getpid(); struct nlmsghdr *nh; + u_int32_t token = 0; + struct rtattr *rta; int off = 0; + if (argc != 2 && argc != 4) + syntax(argv); + + if (argc == 4 && !strcmp(argv[2], "token")) + token = strtoul(argv[3], NULL, 10); + memset(data, 0, sizeof(data)); nh = (void *)data; off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, @@ -1130,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) nh->nlmsg_pid = pid; nh->nlmsg_len = off; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index 795cf1ce8af0..b73bd255ea36 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -657,6 +657,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -711,6 +712,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -765,6 +767,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -819,6 +822,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -873,6 +877,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -937,6 +942,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, @@ -995,6 +1001,7 @@ "actions", "mirred" ], + "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid", "plugins": { "requires": "nsPlugin" }, diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore index 9934d48d9a55..7e47b281c442 100644 --- a/tools/virtio/.gitignore +++ b/tools/virtio/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only *.d virtio_test +vhost_net_test vringh_test virtio-trace/trace-agent diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index d128925980e0..e25e99c1c3b7 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 all: test mod -test: virtio_test vringh_test +test: virtio_test vringh_test vhost_net_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o +vhost_net_test: virtio_ring.o vhost_net_test.o try-run = $(shell set -e; \ if ($(1)) >/dev/null 2>&1; \ @@ -49,6 +50,7 @@ oot-clean: OOT_BUILD+=clean .PHONY: all test mod clean vhost oot oot-clean oot-build clean: - ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ - vhost_test/Module.symvers vhost_test/modules.order *.d + ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \ + vhost_test/.*.cmd vhost_test/Module.symvers \ + vhost_test/modules.order *.d -include *.d diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index 2a8a70e2a950..42a564f22f2d 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VIRTIO_CONFIG_H +#define LINUX_VIRTIO_CONFIG_H #include <linux/virtio_byteorder.h> #include <linux/virtio.h> #include <uapi/linux/virtio_config.h> @@ -95,3 +97,5 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } + +#endif diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c new file mode 100644 index 000000000000..389d99a6d7c7 --- /dev/null +++ b/tools/virtio/vhost_net_test.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <getopt.h> +#include <limits.h> +#include <string.h> +#include <poll.h> +#include <sys/eventfd.h> +#include <stdlib.h> +#include <assert.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <stdbool.h> +#include <linux/vhost.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <linux/in.h> +#include <linux/if_packet.h> +#include <linux/virtio_net.h> +#include <netinet/ether.h> + +#define HDR_LEN sizeof(struct virtio_net_hdr_mrg_rxbuf) +#define TEST_BUF_LEN 256 +#define TEST_PTYPE ETH_P_LOOPBACK +#define DESC_NUM 256 + +/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */ +void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; + +struct vq_info { + int kick; + int call; + int idx; + long started; + long completed; + struct pollfd fds; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct vq_info vqs[2]; + int nvqs; + void *buf; + size_t buf_size; + char *test_buf; + char *res_buf; + struct vhost_memory *mem; + int sock; + int ifindex; + unsigned char mac[ETHER_ADDR_LEN]; +}; + +static int tun_alloc(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + int len = HDR_LEN; + int fd, e; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + perror("Cannot open /dev/net/tun"); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + + e = ioctl(fd, TUNSETIFF, &ifr); + if (e < 0) { + perror("ioctl[TUNSETIFF]"); + close(fd); + return e; + } + + e = ioctl(fd, TUNSETVNETHDRSZ, &len); + if (e < 0) { + perror("ioctl[TUNSETVNETHDRSZ]"); + close(fd); + return e; + } + + e = ioctl(fd, SIOCGIFHWADDR, &ifr); + if (e < 0) { + perror("ioctl[SIOCGIFHWADDR]"); + close(fd); + return e; + } + + memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + return fd; +} + +static void vdev_create_socket(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + + dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE)); + assert(dev->sock != -1); + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0); + + dev->ifindex = ifr.ifr_ifindex; + + /* Set the flags that bring the device up */ + assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0); + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0); +} + +static void vdev_send_packet(struct vdev_info *dev) +{ + char *sendbuf = dev->test_buf + HDR_LEN; + struct sockaddr_ll saddrll = {0}; + int sockfd = dev->sock; + int ret; + + saddrll.sll_family = PF_PACKET; + saddrll.sll_ifindex = dev->ifindex; + saddrll.sll_halen = ETH_ALEN; + saddrll.sll_protocol = htons(TEST_PTYPE); + + ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0, + (struct sockaddr *)&saddrll, + sizeof(struct sockaddr_ll)); + assert(ret >= 0); +} + +static bool vq_notify(struct virtqueue *vq) +{ + struct vq_info *info = vq->priv; + unsigned long long v = 1; + int r; + + r = write(info->kick, &v, sizeof(v)); + assert(r == sizeof(v)); + + return true; +} + +static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) +{ + struct vhost_vring_addr addr = { + .index = info->idx, + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, + }; + struct vhost_vring_state state = { .index = info->idx }; + struct vhost_vring_file file = { .index = info->idx }; + int r; + + state.num = info->vring.num; + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + assert(r >= 0); + + state.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + assert(r >= 0); + + file.fd = info->kick; + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + assert(r >= 0); +} + +static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) +{ + if (info->vq) + vring_del_virtqueue(info->vq); + + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false, + info->ring, vq_notify, NULL, "test"); + assert(info->vq); + info->vq->priv = info; +} + +static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd) +{ + struct vhost_vring_file backend = { .index = idx, .fd = fd }; + struct vq_info *info = &dev->vqs[idx]; + int r; + + info->idx = idx; + info->kick = eventfd(0, EFD_NONBLOCK); + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); + assert(r >= 0); + vq_reset(info, num, &dev->vdev); + vhost_vq_setup(dev, info); + + r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend); + assert(!r); +} + +static void vdev_info_init(struct vdev_info *dev, unsigned long long features) +{ + struct ether_header *eh; + int i, r; + + dev->vdev.features = features; + INIT_LIST_HEAD(&dev->vdev.vqs); + spin_lock_init(&dev->vdev.vqs_list_lock); + + dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2; + dev->buf = malloc(dev->buf_size); + assert(dev->buf); + dev->test_buf = dev->buf; + dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN; + + memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN); + eh = (struct ether_header *)(dev->test_buf + HDR_LEN); + eh->ether_type = htons(TEST_PTYPE); + memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN); + memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN); + + for (i = sizeof(*eh); i < TEST_BUF_LEN; i++) + dev->test_buf[i + HDR_LEN] = (char)i; + + dev->control = open("/dev/vhost-net", O_RDWR); + assert(dev->control >= 0); + + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); + assert(r >= 0); + + dev->mem = malloc(offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + assert(dev->mem); + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + dev->mem->nregions = 1; + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; + dev->mem->regions[0].userspace_addr = (long)dev->buf; + dev->mem->regions[0].memory_size = dev->buf_size; + + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + assert(r >= 0); + + dev->nvqs = 2; +} + +static void wait_for_interrupt(struct vq_info *vq) +{ + unsigned long long val; + + poll(&vq->fds, 1, 100); + + if (vq->fds.revents & POLLIN) + read(vq->fds.fd, &val, sizeof(val)); +} + +static void verify_res_buf(char *res_buf) +{ + int i; + + for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++) + assert(res_buf[i] == (char)i); +} + +static void run_tx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + virtqueue_disable_cb(vq->vq); + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN); + r = virtqueue_add_outbuf(vq->vq, &sl, 1, + dev->test_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + int n; + + n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL); + assert(n == TEST_BUF_LEN); + verify_res_buf(dev->res_buf); + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + + if (delayed) { + if (virtqueue_enable_cb_delayed(vq->vq)) + wait_for_interrupt(vq); + } else { + if (virtqueue_enable_cb(vq->vq)) + wait_for_interrupt(vq); + } + } + printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static void run_rx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN); + + r = virtqueue_add_inbuf(vq->vq, &sl, 1, + dev->res_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + vdev_send_packet(dev); + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + struct ether_header *eh; + + eh = (struct ether_header *)(dev->res_buf + HDR_LEN); + + /* tun netdev is up and running, only handle the + * TEST_PTYPE packet. + */ + if (eh->ether_type == htons(TEST_PTYPE)) { + assert(len == TEST_BUF_LEN + HDR_LEN); + verify_res_buf(dev->res_buf + HDR_LEN); + } + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + } + + printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static const char optstring[] = "h"; +static const struct option longopts[] = { + { + .name = "help", + .val = 'h', + }, + { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, + { + .name = "indirect", + .val = 'I', + }, + { + .name = "no-indirect", + .val = 'i', + }, + { + .name = "virtio-1", + .val = '1', + }, + { + .name = "no-virtio-1", + .val = '0', + }, + { + .name = "delayed-interrupt", + .val = 'D', + }, + { + .name = "no-delayed-interrupt", + .val = 'd', + }, + { + .name = "buf-num", + .val = 'n', + .has_arg = required_argument, + }, + { + .name = "batch", + .val = 'b', + .has_arg = required_argument, + }, + { + } +}; + +static void help(int status) +{ + fprintf(stderr, "Usage: vhost_net_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + " [--no-virtio-1]" + " [--delayed-interrupt]" + " [--buf-num]" + "\n"); + + exit(status); +} + +int main(int argc, char **argv) +{ + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); + char tun_name[IFNAMSIZ]; + long nbufs = 0x100000; + struct vdev_info dev; + bool delayed = false; + int o, fd; + + for (;;) { + o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; + case 'h': + help(0); + case 'i': + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); + break; + case '0': + features &= ~(1ULL << VIRTIO_F_VERSION_1); + break; + case 'D': + delayed = true; + break; + case 'n': + nbufs = strtol(optarg, NULL, 10); + assert(nbufs > 0); + break; + default: + assert(0); + break; + } + } + +done: + memset(&dev, 0, sizeof(dev)); + snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid()); + + fd = tun_alloc(&dev, tun_name); + assert(fd >= 0); + + vdev_info_init(&dev, features); + vq_info_add(&dev, 0, DESC_NUM, fd); + vq_info_add(&dev, 1, DESC_NUM, fd); + vdev_create_socket(&dev, tun_name); + + run_rx_test(&dev, &dev.vqs[0], delayed, nbufs); + run_tx_test(&dev, &dev.vqs[1], delayed, nbufs); + + return 0; +} |