diff options
39 files changed, 1781 insertions, 670 deletions
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt index df9e844dd6bc..2681168777a1 100644 --- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt +++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt @@ -158,6 +158,7 @@ KSZ9031: no link will be established. KSZ9131: +LAN8841: All skew control options are specified in picoseconds. The increment step is 100ps. Unlike KSZ9031, the values represent picoseccond delays. diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index e21d096c5a90..8010f31cd10d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -314,7 +314,6 @@ struct psfp_cap { }; #define ENETC_F_TX_TSTAMP_MASK 0xff -/* TODO: more hardware offloads */ enum enetc_active_offloads { /* 8 bits reserved for TX timestamp types (hwtstamp_tx_types) */ ENETC_F_TX_TSTAMP = BIT(0), @@ -323,6 +322,7 @@ enum enetc_active_offloads { ENETC_F_RX_TSTAMP = BIT(8), ENETC_F_QBV = BIT(9), ENETC_F_QCI = BIT(10), + ENETC_F_QBU = BIT(11), }; enum enetc_flags_bit { @@ -382,6 +382,11 @@ struct enetc_ndev_priv { struct work_struct tx_onestep_tstamp; struct sk_buff_head tx_skbs; + + /* Serialize access to MAC Merge state between ethtool requests + * and link state updates + */ + struct mutex mm_lock; }; /* Messaging */ @@ -427,6 +432,7 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames, /* ethtool */ void enetc_set_ethtool_ops(struct net_device *ndev); +void enetc_mm_link_state_update(struct enetc_ndev_priv *priv, bool link); /* control buffer descriptor ring (CBDR) */ int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 05e2bad609c6..bca68edfbe9c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) /* Copyright 2017-2019 NXP */ +#include <linux/ethtool_netlink.h> #include <linux/net_tstamp.h> #include <linux/module.h> #include "enetc.h" @@ -299,14 +300,32 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, data[o++] = enetc_port_rd(hw, enetc_port_counters[i].reg); } +static void enetc_pause_stats(struct enetc_hw *hw, int mac, + struct ethtool_pause_stats *pause_stats) +{ + pause_stats->tx_pause_frames = enetc_port_rd(hw, ENETC_PM_TXPF(mac)); + pause_stats->rx_pause_frames = enetc_port_rd(hw, ENETC_PM_RXPF(mac)); +} + static void enetc_get_pause_stats(struct net_device *ndev, struct ethtool_pause_stats *pause_stats) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; - pause_stats->tx_pause_frames = enetc_port_rd(hw, ENETC_PM_TXPF(0)); - pause_stats->rx_pause_frames = enetc_port_rd(hw, ENETC_PM_RXPF(0)); + switch (pause_stats->src) { + case ETHTOOL_MAC_STATS_SRC_EMAC: + enetc_pause_stats(hw, 0, pause_stats); + break; + case ETHTOOL_MAC_STATS_SRC_PMAC: + if (si->hw_features & ENETC_SI_F_QBU) + enetc_pause_stats(hw, 1, pause_stats); + break; + case ETHTOOL_MAC_STATS_SRC_AGGREGATE: + ethtool_aggregate_pause_stats(ndev, pause_stats); + break; + } } static void enetc_mac_stats(struct enetc_hw *hw, int mac, @@ -383,8 +402,20 @@ static void enetc_get_eth_mac_stats(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; - enetc_mac_stats(hw, 0, mac_stats); + switch (mac_stats->src) { + case ETHTOOL_MAC_STATS_SRC_EMAC: + enetc_mac_stats(hw, 0, mac_stats); + break; + case ETHTOOL_MAC_STATS_SRC_PMAC: + if (si->hw_features & ENETC_SI_F_QBU) + enetc_mac_stats(hw, 1, mac_stats); + break; + case ETHTOOL_MAC_STATS_SRC_AGGREGATE: + ethtool_aggregate_mac_stats(ndev, mac_stats); + break; + } } static void enetc_get_eth_ctrl_stats(struct net_device *ndev, @@ -392,8 +423,20 @@ static void enetc_get_eth_ctrl_stats(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; - enetc_ctrl_stats(hw, 0, ctrl_stats); + switch (ctrl_stats->src) { + case ETHTOOL_MAC_STATS_SRC_EMAC: + enetc_ctrl_stats(hw, 0, ctrl_stats); + break; + case ETHTOOL_MAC_STATS_SRC_PMAC: + if (si->hw_features & ENETC_SI_F_QBU) + enetc_ctrl_stats(hw, 1, ctrl_stats); + break; + case ETHTOOL_MAC_STATS_SRC_AGGREGATE: + ethtool_aggregate_ctrl_stats(ndev, ctrl_stats); + break; + } } static void enetc_get_rmon_stats(struct net_device *ndev, @@ -402,8 +445,20 @@ static void enetc_get_rmon_stats(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; - enetc_rmon_stats(hw, 0, rmon_stats, ranges); + switch (rmon_stats->src) { + case ETHTOOL_MAC_STATS_SRC_EMAC: + enetc_rmon_stats(hw, 0, rmon_stats, ranges); + break; + case ETHTOOL_MAC_STATS_SRC_PMAC: + if (si->hw_features & ENETC_SI_F_QBU) + enetc_rmon_stats(hw, 1, rmon_stats, ranges); + break; + case ETHTOOL_MAC_STATS_SRC_AGGREGATE: + ethtool_aggregate_rmon_stats(ndev, rmon_stats); + break; + } } #define ENETC_RSSHASH_L3 (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO | RXH_IP_SRC | \ @@ -863,6 +918,166 @@ static int enetc_set_link_ksettings(struct net_device *dev, return phylink_ethtool_ksettings_set(priv->phylink, cmd); } +static void enetc_get_mm_stats(struct net_device *ndev, + struct ethtool_mm_stats *s) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return; + + s->MACMergeFrameAssErrorCount = enetc_port_rd(hw, ENETC_MMFAECR); + s->MACMergeFrameSmdErrorCount = enetc_port_rd(hw, ENETC_MMFSECR); + s->MACMergeFrameAssOkCount = enetc_port_rd(hw, ENETC_MMFAOCR); + s->MACMergeFragCountRx = enetc_port_rd(hw, ENETC_MMFCRXR); + s->MACMergeFragCountTx = enetc_port_rd(hw, ENETC_MMFCTXR); + s->MACMergeHoldCount = enetc_port_rd(hw, ENETC_MMHCR); +} + +static int enetc_get_mm(struct net_device *ndev, struct ethtool_mm_state *state) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_si *si = priv->si; + struct enetc_hw *hw = &si->hw; + u32 lafs, rafs, val; + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return -EOPNOTSUPP; + + mutex_lock(&priv->mm_lock); + + val = enetc_port_rd(hw, ENETC_PFPMR); + state->pmac_enabled = !!(val & ENETC_PFPMR_PMACE); + + val = enetc_port_rd(hw, ENETC_MMCSR); + + switch (ENETC_MMCSR_GET_VSTS(val)) { + case 0: + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; + break; + case 2: + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_VERIFYING; + break; + case 3: + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED; + break; + case 4: + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_FAILED; + break; + case 5: + default: + state->verify_status = ETHTOOL_MM_VERIFY_STATUS_UNKNOWN; + break; + } + + rafs = ENETC_MMCSR_GET_RAFS(val); + state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(rafs); + lafs = ENETC_MMCSR_GET_LAFS(val); + state->rx_min_frag_size = ethtool_mm_frag_size_add_to_min(lafs); + state->tx_enabled = !!(val & ENETC_MMCSR_LPE); /* mirror of MMCSR_ME */ + state->tx_active = !!(val & ENETC_MMCSR_LPA); + state->verify_enabled = !(val & ENETC_MMCSR_VDIS); + state->verify_time = ENETC_MMCSR_GET_VT(val); + /* A verifyTime of 128 ms would exceed the 7 bit width + * of the ENETC_MMCSR_VT field + */ + state->max_verify_time = 127; + + mutex_unlock(&priv->mm_lock); + + return 0; +} + +static int enetc_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, + struct netlink_ext_ack *extack) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + struct enetc_si *si = priv->si; + u32 val, add_frag_size; + int err; + + if (!(si->hw_features & ENETC_SI_F_QBU)) + return -EOPNOTSUPP; + + err = ethtool_mm_frag_size_min_to_add(cfg->tx_min_frag_size, + &add_frag_size, extack); + if (err) + return err; + + mutex_lock(&priv->mm_lock); + + val = enetc_port_rd(hw, ENETC_PFPMR); + if (cfg->pmac_enabled) + val |= ENETC_PFPMR_PMACE; + else + val &= ~ENETC_PFPMR_PMACE; + enetc_port_wr(hw, ENETC_PFPMR, val); + + val = enetc_port_rd(hw, ENETC_MMCSR); + + if (cfg->verify_enabled) + val &= ~ENETC_MMCSR_VDIS; + else + val |= ENETC_MMCSR_VDIS; + + if (cfg->tx_enabled) + priv->active_offloads |= ENETC_F_QBU; + else + priv->active_offloads &= ~ENETC_F_QBU; + + /* If link is up, enable MAC Merge right away */ + if (!!(priv->active_offloads & ENETC_F_QBU) && + !(val & ENETC_MMCSR_LINK_FAIL)) + val |= ENETC_MMCSR_ME; + + val &= ~ENETC_MMCSR_VT_MASK; + val |= ENETC_MMCSR_VT(cfg->verify_time); + + val &= ~ENETC_MMCSR_RAFS_MASK; + val |= ENETC_MMCSR_RAFS(add_frag_size); + + enetc_port_wr(hw, ENETC_MMCSR, val); + + mutex_unlock(&priv->mm_lock); + + return 0; +} + +/* When the link is lost, the verification state machine goes to the FAILED + * state and doesn't restart on its own after a new link up event. + * According to 802.3 Figure 99-8 - Verify state diagram, the LINK_FAIL bit + * should have been sufficient to re-trigger verification, but for ENETC it + * doesn't. As a workaround, we need to toggle the Merge Enable bit to + * re-trigger verification when link comes up. + */ +void enetc_mm_link_state_update(struct enetc_ndev_priv *priv, bool link) +{ + struct enetc_hw *hw = &priv->si->hw; + u32 val; + + mutex_lock(&priv->mm_lock); + + val = enetc_port_rd(hw, ENETC_MMCSR); + + if (link) { + val &= ~ENETC_MMCSR_LINK_FAIL; + if (priv->active_offloads & ENETC_F_QBU) + val |= ENETC_MMCSR_ME; + } else { + val |= ENETC_MMCSR_LINK_FAIL; + if (priv->active_offloads & ENETC_F_QBU) + val &= ~ENETC_MMCSR_ME; + } + + enetc_port_wr(hw, ENETC_MMCSR, val); + + mutex_unlock(&priv->mm_lock); +} +EXPORT_SYMBOL_GPL(enetc_mm_link_state_update); + static const struct ethtool_ops enetc_pf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@ -893,6 +1108,9 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = { .set_wol = enetc_set_wol, .get_pauseparam = enetc_get_pauseparam, .set_pauseparam = enetc_set_pauseparam, + .get_mm = enetc_get_mm, + .set_mm = enetc_set_mm, + .get_mm_stats = enetc_get_mm_stats, }; static const struct ethtool_ops enetc_vf_ethtool_ops = { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 041df7d0ae81..de2e0ee8cdcb 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -222,7 +222,30 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PSIVHFR0(n) (0x1e00 + (n) * 8) /* n = SI index */ #define ENETC_PSIVHFR1(n) (0x1e04 + (n) * 8) /* n = SI index */ #define ENETC_MMCSR 0x1f00 -#define ENETC_MMCSR_ME BIT(16) +#define ENETC_MMCSR_LINK_FAIL BIT(31) +#define ENETC_MMCSR_VT_MASK GENMASK(29, 23) /* Verify Time */ +#define ENETC_MMCSR_VT(x) (((x) << 23) & ENETC_MMCSR_VT_MASK) +#define ENETC_MMCSR_GET_VT(x) (((x) & ENETC_MMCSR_VT_MASK) >> 23) +#define ENETC_MMCSR_TXSTS_MASK GENMASK(22, 21) /* Merge Status */ +#define ENETC_MMCSR_GET_TXSTS(x) (((x) & ENETC_MMCSR_TXSTS_MASK) >> 21) +#define ENETC_MMCSR_VSTS_MASK GENMASK(20, 18) /* Verify Status */ +#define ENETC_MMCSR_GET_VSTS(x) (((x) & ENETC_MMCSR_VSTS_MASK) >> 18) +#define ENETC_MMCSR_VDIS BIT(17) /* Verify Disabled */ +#define ENETC_MMCSR_ME BIT(16) /* Merge Enabled */ +#define ENETC_MMCSR_RAFS_MASK GENMASK(9, 8) /* Remote Additional Fragment Size */ +#define ENETC_MMCSR_RAFS(x) (((x) << 8) & ENETC_MMCSR_RAFS_MASK) +#define ENETC_MMCSR_GET_RAFS(x) (((x) & ENETC_MMCSR_RAFS_MASK) >> 8) +#define ENETC_MMCSR_LAFS_MASK GENMASK(4, 3) /* Local Additional Fragment Size */ +#define ENETC_MMCSR_GET_LAFS(x) (((x) & ENETC_MMCSR_LAFS_MASK) >> 3) +#define ENETC_MMCSR_LPA BIT(2) /* Local Preemption Active */ +#define ENETC_MMCSR_LPE BIT(1) /* Local Preemption Enabled */ +#define ENETC_MMCSR_LPS BIT(0) /* Local Preemption Supported */ +#define ENETC_MMFAECR 0x1f08 +#define ENETC_MMFSECR 0x1f0c +#define ENETC_MMFAOCR 0x1f10 +#define ENETC_MMFCRXR 0x1f14 +#define ENETC_MMFCTXR 0x1f18 +#define ENETC_MMHCR 0x1f1c #define ENETC_PTCMSDUR(n) (0x2020 + (n) * 4) /* n = TC index [0..7] */ #define ENETC_PMAC_OFFSET 0x1000 diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 7facc7d5261e..97056dc3496d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1083,6 +1083,9 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, enetc_port_mac_wr(si, ENETC_PM0_CMD_CFG, cmd_cfg); enetc_mac_enable(si, true); + + if (si->hw_features & ENETC_SI_F_QBU) + enetc_mm_link_state_update(priv, true); } static void enetc_pl_mac_link_down(struct phylink_config *config, @@ -1090,8 +1093,15 @@ static void enetc_pl_mac_link_down(struct phylink_config *config, phy_interface_t interface) { struct enetc_pf *pf = phylink_to_enetc_pf(config); + struct enetc_si *si = pf->si; + struct enetc_ndev_priv *priv; - enetc_mac_enable(pf->si, false); + priv = netdev_priv(si->ndev); + + if (si->hw_features & ENETC_SI_F_QBU) + enetc_mm_link_state_update(priv, false); + + enetc_mac_enable(si, false); } static const struct phylink_mac_ops enetc_mac_phylink_ops = { @@ -1284,6 +1294,8 @@ static int enetc_pf_probe(struct pci_dev *pdev, priv = netdev_priv(ndev); + mutex_init(&priv->mm_lock); + enetc_init_si_rings_params(priv); err = enetc_alloc_si_resources(priv); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index f9f15acae90a..f6dd3f8fd936 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -71,17 +71,17 @@ void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) if (!ice_is_switchdev_running(vf->pf)) return; - if (is_valid_ether_addr(vf->hw_lan_addr.addr)) { + if (is_valid_ether_addr(vf->hw_lan_addr)) { err = ice_eswitch_add_vf_mac_rule(vf->pf, vf, - vf->hw_lan_addr.addr); + vf->hw_lan_addr); if (err) { dev_err(ice_pf_to_dev(vf->pf), "Failed to add MAC %pM for VF %d\n, error %d\n", - vf->hw_lan_addr.addr, vf->vf_id, err); + vf->hw_lan_addr, vf->vf_id, err); return; } vf->num_mac++; - ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr); + ether_addr_copy(vf->dev_lan_addr, vf->hw_lan_addr); } } @@ -237,7 +237,7 @@ ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); netif_napi_del(&vf->repr->q_vector->napi); @@ -265,14 +265,14 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) GFP_KERNEL); if (!vf->repr->dst) { ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr.addr, + vf->hw_lan_addr, ICE_FWD_TO_VSI); goto err; } if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) { ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr.addr, + vf->hw_lan_addr, ICE_FWD_TO_VSI); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; @@ -281,7 +281,7 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) if (ice_vsi_add_vlan_zero(vsi)) { ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr.addr, + vf->hw_lan_addr, ICE_FWD_TO_VSI); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; @@ -338,7 +338,7 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi) ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); if (ret) { - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor", vsi->vf->vf_id); } @@ -425,7 +425,13 @@ static void ice_eswitch_release_env(struct ice_pf *pf) static struct ice_vsi * ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, NULL, NULL); + struct ice_vsi_cfg_params params = {}; + + params.type = ICE_VSI_SWITCHDEV_CTRL; + params.pi = pi; + params.flags = ICE_VSI_FLAG_INIT; + + return ice_vsi_setup(pf, ¶ms); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 1f770a15d1a4..960197b2301c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -166,14 +166,14 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) /** * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI * @vsi: the VSI being configured - * @vf: the VF associated with this VSI, if any * * Return 0 on success and a negative value on error */ -static void ice_vsi_set_num_qs(struct ice_vsi *vsi, struct ice_vf *vf) +static void ice_vsi_set_num_qs(struct ice_vsi *vsi) { enum ice_vsi_type vsi_type = vsi->type; struct ice_pf *pf = vsi->back; + struct ice_vf *vf = vsi->vf; if (WARN_ON(vsi_type == ICE_VSI_VF && !vf)) return; @@ -594,15 +594,13 @@ err_alloc_tx: /** * ice_vsi_alloc_def - set default values for already allocated VSI * @vsi: ptr to VSI - * @vf: VF for ICE_VSI_VF and ICE_VSI_CTRL * @ch: ptr to channel */ static int -ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_vf *vf, - struct ice_channel *ch) +ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch) { if (vsi->type != ICE_VSI_CHNL) { - ice_vsi_set_num_qs(vsi, vf); + ice_vsi_set_num_qs(vsi); if (ice_vsi_alloc_arrays(vsi)) return -ENOMEM; } @@ -641,28 +639,18 @@ ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_vf *vf, /** * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure - * @pi: pointer to the port_info instance - * @vsi_type: type of VSI - * @ch: ptr to channel - * @vf: VF for ICE_VSI_VF and ICE_VSI_CTRL * - * The VF pointer is used for ICE_VSI_VF and ICE_VSI_CTRL. For ICE_VSI_CTRL, - * it may be NULL in the case there is no association with a VF. For - * ICE_VSI_VF the VF pointer *must not* be NULL. + * Reserves a VSI index from the PF and allocates an empty VSI structure + * without a type. The VSI structure must later be initialized by calling + * ice_vsi_cfg(). * * returns a pointer to a VSI on success, NULL on failure. */ -static struct ice_vsi * -ice_vsi_alloc(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, struct ice_channel *ch, - struct ice_vf *vf) +static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; - if (WARN_ON(vsi_type == ICE_VSI_VF && !vf)) - return NULL; - /* Need to protect the allocation of the VSIs at the PF level */ mutex_lock(&pf->sw_mutex); @@ -679,11 +667,7 @@ ice_vsi_alloc(struct ice_pf *pf, struct ice_port_info *pi, if (!vsi) goto unlock_pf; - vsi->type = vsi_type; vsi->back = pf; - vsi->port_info = pi; - /* For VSIs which don't have a connected VF, this will be NULL */ - vsi->vf = vf; set_bit(ICE_VSI_DOWN, vsi->state); /* fill slot and make note of the index */ @@ -694,15 +678,6 @@ ice_vsi_alloc(struct ice_pf *pf, struct ice_port_info *pi, pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, pf->next_vsi); - if (vsi->type == ICE_VSI_CTRL) { - if (vf) { - vf->ctrl_vsi_idx = vsi->idx; - } else { - WARN_ON(pf->ctrl_vsi_idx != ICE_NO_VSI); - pf->ctrl_vsi_idx = vsi->idx; - } - } - unlock_pf: mutex_unlock(&pf->sw_mutex); return vsi; @@ -1267,12 +1242,15 @@ ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) /** * ice_vsi_init - Create and initialize a VSI * @vsi: the VSI being configured - * @init_vsi: flag, tell if VSI need to be initialized + * @vsi_flags: VSI configuration flags + * + * Set ICE_FLAG_VSI_INIT to initialize a new VSI context, clear it to + * reconfigure an existing context. * * This initializes a VSI context depending on the VSI type to be added and * passes it down to the add_vsi aq command to create a new VSI. */ -static int ice_vsi_init(struct ice_vsi *vsi, int init_vsi) +static int ice_vsi_init(struct ice_vsi *vsi, u32 vsi_flags) { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; @@ -1334,7 +1312,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, int init_vsi) /* if updating VSI context, make sure to set valid_section: * to indicate which section of VSI context being updated */ - if (!(init_vsi & ICE_VSI_FLAG_INIT)) + if (!(vsi_flags & ICE_VSI_FLAG_INIT)) ctxt->info.valid_sections |= cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); } @@ -1347,7 +1325,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, int init_vsi) if (ret) goto out; - if (!(init_vsi & ICE_VSI_FLAG_INIT)) + if (!(vsi_flags & ICE_VSI_FLAG_INIT)) /* means VSI being updated */ /* must to indicate which section of VSI context are * being modified @@ -1363,7 +1341,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, int init_vsi) cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); } - if (init_vsi & ICE_VSI_FLAG_INIT) { + if (vsi_flags & ICE_VSI_FLAG_INIT) { ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL); if (ret) { dev_err(dev, "Add VSI failed, err %d\n", ret); @@ -1527,7 +1505,7 @@ static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) * ice_vsi_setup_vector_base - Set up the base vector for the given VSI * @vsi: ptr to the VSI * - * This should only be called after ice_vsi_alloc() which allocates the + * This should only be called after ice_vsi_alloc_def() which allocates the * corresponding SW VSI structure and initializes num_queue_pairs for the * newly allocated VSI. * @@ -2702,14 +2680,10 @@ static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi) /** * ice_vsi_cfg_def - configure default VSI based on the type * @vsi: pointer to VSI - * @vf: pointer to VF to which this VSI connects. This field is used primarily - * for the ICE_VSI_VF type. Other VSI types should pass NULL. - * @ch: ptr to channel - * @init_vsi: is this an initialization or a reconfigure of the VSI + * @params: the parameters to configure this VSI with */ static int -ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vf *vf, struct ice_channel *ch, - int init_vsi) +ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) { struct device *dev = ice_pf_to_dev(vsi->back); struct ice_pf *pf = vsi->back; @@ -2717,7 +2691,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vf *vf, struct ice_channel *ch, vsi->vsw = pf->first_sw; - ret = ice_vsi_alloc_def(vsi, vf, ch); + ret = ice_vsi_alloc_def(vsi, params->ch); if (ret) return ret; @@ -2740,7 +2714,7 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vf *vf, struct ice_channel *ch, ice_vsi_set_tc_cfg(vsi); /* create the VSI */ - ret = ice_vsi_init(vsi, init_vsi); + ret = ice_vsi_init(vsi, params->flags); if (ret) goto unroll_get_qs; @@ -2863,19 +2837,25 @@ unroll_vsi_alloc: } /** - * ice_vsi_cfg - configure VSI and tc on it + * ice_vsi_cfg - configure a previously allocated VSI * @vsi: pointer to VSI - * @vf: pointer to VF to which this VSI connects. This field is used primarily - * for the ICE_VSI_VF type. Other VSI types should pass NULL. - * @ch: ptr to channel - * @init_vsi: is this an initialization or a reconfigure of the VSI + * @params: parameters used to configure this VSI */ -int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vf *vf, struct ice_channel *ch, - int init_vsi) +int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) { + struct ice_pf *pf = vsi->back; int ret; - ret = ice_vsi_cfg_def(vsi, vf, ch, init_vsi); + if (WARN_ON(params->type == ICE_VSI_VF && !params->vf)) + return -EINVAL; + + vsi->type = params->type; + vsi->port_info = params->pi; + + /* For VSIs which don't have a connected VF, this will be NULL */ + vsi->vf = params->vf; + + ret = ice_vsi_cfg_def(vsi, params); if (ret) return ret; @@ -2883,6 +2863,16 @@ int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vf *vf, struct ice_channel *ch, if (ret) ice_vsi_decfg(vsi); + if (vsi->type == ICE_VSI_CTRL) { + if (vsi->vf) { + WARN_ON(vsi->vf->ctrl_vsi_idx != ICE_NO_VSI); + vsi->vf->ctrl_vsi_idx = vsi->idx; + } else { + WARN_ON(pf->ctrl_vsi_idx != ICE_NO_VSI); + pf->ctrl_vsi_idx = vsi->idx; + } + } + return ret; } @@ -2946,11 +2936,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi) /** * ice_vsi_setup - Set up a VSI by a given type * @pf: board private structure - * @pi: pointer to the port_info instance - * @vsi_type: VSI type - * @vf: pointer to VF to which this VSI connects. This field is used primarily - * for the ICE_VSI_VF type. Other VSI types should pass NULL. - * @ch: ptr to channel + * @params: parameters to use when creating the VSI * * This allocates the sw VSI structure and its queue resources. * @@ -2958,21 +2944,26 @@ void ice_vsi_decfg(struct ice_vsi *vsi) * success, NULL on failure. */ struct ice_vsi * -ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, struct ice_vf *vf, - struct ice_channel *ch) +ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params) { struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi; int ret; - vsi = ice_vsi_alloc(pf, pi, vsi_type, ch, vf); + /* ice_vsi_setup can only initialize a new VSI, and we must have + * a port_info structure for it. + */ + if (WARN_ON(!(params->flags & ICE_VSI_FLAG_INIT)) || + WARN_ON(!params->pi)) + return NULL; + + vsi = ice_vsi_alloc(pf); if (!vsi) { dev_err(dev, "could not allocate VSI\n"); return NULL; } - ret = ice_vsi_cfg(vsi, vf, ch, ICE_VSI_FLAG_INIT); + ret = ice_vsi_cfg(vsi, params); if (ret) goto err_vsi_cfg; @@ -2997,7 +2988,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, return vsi; err_vsi_cfg: - if (vsi_type == ICE_VSI_VF) + if (params->type == ICE_VSI_VF) ice_enable_lag(pf->lag); ice_vsi_free(vsi); @@ -3477,12 +3468,16 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) /** * ice_vsi_rebuild - Rebuild VSI after reset * @vsi: VSI to be rebuild - * @init_vsi: flag, tell if VSI need to be initialized + * @vsi_flags: flags used for VSI rebuild flow + * + * Set vsi_flags to ICE_VSI_FLAG_INIT to initialize a new VSI, or + * ICE_VSI_FLAG_NO_INIT to rebuild an existing VSI in hardware. * * Returns 0 on success and negative value on failure */ -int ice_vsi_rebuild(struct ice_vsi *vsi, int init_vsi) +int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) { + struct ice_vsi_cfg_params params = {}; struct ice_coalesce_stored *coalesce; int ret, prev_txq, prev_rxq; int prev_num_q_vectors = 0; @@ -3491,6 +3486,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, int init_vsi) if (!vsi) return -EINVAL; + params = ice_vsi_to_params(vsi); + params.flags = vsi_flags; + pf = vsi->back; if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; @@ -3506,13 +3504,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, int init_vsi) prev_rxq = vsi->num_rxq; ice_vsi_decfg(vsi); - ret = ice_vsi_cfg_def(vsi, vsi->vf, vsi->ch, init_vsi); + ret = ice_vsi_cfg_def(vsi, ¶ms); if (ret) goto err_vsi_cfg; ret = ice_vsi_cfg_tc_lan(pf, vsi); if (ret) { - if (init_vsi & ICE_VSI_FLAG_INIT) { + if (vsi_flags & ICE_VSI_FLAG_INIT) { ret = -EIO; goto err_vsi_cfg_tc_lan; } else { diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b76f05e1f8a3..75221478f2dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -7,6 +7,47 @@ #include "ice.h" #include "ice_vlan.h" +/* Flags used for VSI configuration and rebuild */ +#define ICE_VSI_FLAG_INIT BIT(0) +#define ICE_VSI_FLAG_NO_INIT 0 + +/** + * struct ice_vsi_cfg_params - VSI configuration parameters + * @pi: pointer to the port_info instance for the VSI + * @ch: pointer to the channel structure for the VSI, may be NULL + * @vf: pointer to the VF associated with this VSI, may be NULL + * @type: the type of VSI to configure + * @flags: VSI flags used for rebuild and configuration + * + * Parameter structure used when configuring a new VSI. + */ +struct ice_vsi_cfg_params { + struct ice_port_info *pi; + struct ice_channel *ch; + struct ice_vf *vf; + enum ice_vsi_type type; + u32 flags; +}; + +/** + * ice_vsi_to_params - Get parameters for an existing VSI + * @vsi: the VSI to get parameters for + * + * Fill a parameter structure for reconfiguring a VSI with its current + * parameters, such as during a rebuild operation. + */ +static inline struct ice_vsi_cfg_params ice_vsi_to_params(struct ice_vsi *vsi) +{ + struct ice_vsi_cfg_params params = {}; + + params.pi = vsi->port_info; + params.ch = vsi->ch; + params.vf = vsi->vf; + params.type = vsi->type; + + return params; +} + const char *ice_vsi_type_str(enum ice_vsi_type vsi_type); bool ice_pf_state_is_nominal(struct ice_pf *pf); @@ -50,9 +91,7 @@ int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi); void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * -ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, struct ice_vf *vf, - struct ice_channel *ch); +ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params); void ice_napi_del(struct ice_vsi *vsi); @@ -70,11 +109,8 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id); int ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id); -#define ICE_VSI_FLAG_INIT BIT(0) -#define ICE_VSI_FLAG_NO_INIT 0 -int ice_vsi_rebuild(struct ice_vsi *vsi, int init_vsi); -int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vf *vf, - struct ice_channel *ch, int init_vsi); +int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags); +int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params); bool ice_is_reset_in_progress(unsigned long *state); int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 433298d0014a..b71ff7e2ebf2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -537,7 +537,7 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) /* Disable VFs until reset is completed */ mutex_lock(&pf->vfs.table_lock); ice_for_each_vf(pf, bkt, vf) - ice_set_vf_state_qs_dis(vf); + ice_set_vf_state_dis(vf); mutex_unlock(&pf->vfs.table_lock); if (ice_is_eswitch_mode_switchdev(pf)) { @@ -3447,14 +3447,27 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL); + struct ice_vsi_cfg_params params = {}; + + params.type = ICE_VSI_PF; + params.pi = pi; + params.flags = ICE_VSI_FLAG_INIT; + + return ice_vsi_setup(pf, ¶ms); } static struct ice_vsi * ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_channel *ch) { - return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch); + struct ice_vsi_cfg_params params = {}; + + params.type = ICE_VSI_CHNL; + params.pi = pi; + params.ch = ch; + params.flags = ICE_VSI_FLAG_INIT; + + return ice_vsi_setup(pf, ¶ms); } /** @@ -3468,7 +3481,13 @@ ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, static struct ice_vsi * ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL); + struct ice_vsi_cfg_params params = {}; + + params.type = ICE_VSI_CTRL; + params.pi = pi; + params.flags = ICE_VSI_FLAG_INIT; + + return ice_vsi_setup(pf, ¶ms); } /** @@ -3482,7 +3501,13 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi * ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL); + struct ice_vsi_cfg_params params = {}; + + params.type = ICE_VSI_LB; + params.pi = pi; + params.flags = ICE_VSI_FLAG_INIT; + + return ice_vsi_setup(pf, ¶ms); } /** @@ -5002,6 +5027,7 @@ static void ice_deinit(struct ice_pf *pf) */ int ice_load(struct ice_pf *pf) { + struct ice_vsi_cfg_params params = {}; struct ice_vsi *vsi; int err; @@ -5014,7 +5040,11 @@ int ice_load(struct ice_pf *pf) return err; vsi = ice_get_main_vsi(pf); - err = ice_vsi_cfg(vsi, NULL, NULL, ICE_VSI_FLAG_INIT); + + params = ice_vsi_to_params(vsi); + params.flags = ICE_VSI_FLAG_INIT; + + err = ice_vsi_cfg(vsi, ¶ms); if (err) goto err_vsi_cfg; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 3ba1408c56a9..96a64c25e2ef 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -41,21 +41,6 @@ static void ice_free_vf_entries(struct ice_pf *pf) } /** - * ice_vf_vsi_release - invalidate the VF's VSI after freeing it - * @vf: invalidate this VF's VSI after freeing it - */ -static void ice_vf_vsi_release(struct ice_vf *vf) -{ - struct ice_vsi *vsi = ice_get_vf_vsi(vf); - - if (WARN_ON(!vsi)) - return; - - ice_vsi_release(vsi); - ice_vf_invalidate_vsi(vf); -} - -/** * ice_free_vf_res - Free a VF's resources * @vf: pointer to the VF info */ @@ -248,11 +233,16 @@ void ice_free_vfs(struct ice_pf *pf) */ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) { - struct ice_port_info *pi = ice_vf_get_port_info(vf); + struct ice_vsi_cfg_params params = {}; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf, NULL); + params.type = ICE_VSI_VF; + params.pi = ice_vf_get_port_info(vf); + params.vf = vf; + params.flags = ICE_VSI_FLAG_INIT; + + vsi = ice_vsi_setup(pf, ¶ms); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); @@ -583,51 +573,19 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) */ static int ice_init_vf_vsi_res(struct ice_vf *vf) { - struct ice_vsi_vlan_ops *vlan_ops; struct ice_pf *pf = vf->pf; - u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; - struct device *dev; int err; vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf); - dev = ice_pf_to_dev(pf); vsi = ice_vf_vsi_setup(vf); if (!vsi) return -ENOMEM; - err = ice_vsi_add_vlan_zero(vsi); - if (err) { - dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n", - vf->vf_id); - goto release_vsi; - } - - vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); - err = vlan_ops->ena_rx_filtering(vsi); - if (err) { - dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n", - vf->vf_id); - goto release_vsi; - } - - eth_broadcast_addr(broadcast); - err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); - if (err) { - dev_err(dev, "Failed to add broadcast MAC filter for VF %d, error %d\n", - vf->vf_id, err); - goto release_vsi; - } - - err = ice_vsi_apply_spoofchk(vsi, vf->spoofchk); - if (err) { - dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n", - vf->vf_id); + err = ice_vf_init_host_cfg(vf, vsi); + if (err) goto release_vsi; - } - - vf->num_mac = 1; return 0; @@ -697,6 +655,21 @@ static void ice_sriov_free_vf(struct ice_vf *vf) } /** + * ice_sriov_clear_reset_state - clears VF Reset status register + * @vf: the vf to configure + */ +static void ice_sriov_clear_reset_state(struct ice_vf *vf) +{ + struct ice_hw *hw = &vf->pf->hw; + + /* Clear the reset status register so that VF immediately sees that + * the device is resetting, even if hardware hasn't yet gotten around + * to clearing VFGEN_RSTAT for us. + */ + wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_INPROGRESS); +} + +/** * ice_sriov_clear_mbx_register - clears SRIOV VF's mailbox registers * @vf: the vf to configure */ @@ -799,23 +772,19 @@ static void ice_sriov_clear_reset_trigger(struct ice_vf *vf) } /** - * ice_sriov_vsi_rebuild - release and rebuild VF's VSI - * @vf: VF to release and setup the VSI for + * ice_sriov_create_vsi - Create a new VSI for a VF + * @vf: VF to create the VSI for * - * This is only called when a single VF is being reset (i.e. VFR, VFLR, host VF - * configuration change, etc.). + * This is called by ice_vf_recreate_vsi to create the new VSI after the old + * VSI has been released. */ -static int ice_sriov_vsi_rebuild(struct ice_vf *vf) +static int ice_sriov_create_vsi(struct ice_vf *vf) { - struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; - ice_vf_vsi_release(vf); - if (!ice_vf_vsi_setup(vf)) { - dev_err(ice_pf_to_dev(pf), - "Failed to release and setup the VF%u's VSI\n", - vf->vf_id); + vsi = ice_vf_vsi_setup(vf); + if (!vsi) return -ENOMEM; - } return 0; } @@ -826,8 +795,6 @@ static int ice_sriov_vsi_rebuild(struct ice_vf *vf) */ static void ice_sriov_post_vsi_rebuild(struct ice_vf *vf) { - ice_vf_rebuild_host_cfg(vf); - ice_vf_set_initialized(vf); ice_ena_vf_mappings(vf); wr32(&vf->pf->hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); } @@ -835,11 +802,13 @@ static void ice_sriov_post_vsi_rebuild(struct ice_vf *vf) static const struct ice_vf_ops ice_sriov_vf_ops = { .reset_type = ICE_VF_RESET, .free = ice_sriov_free_vf, + .clear_reset_state = ice_sriov_clear_reset_state, .clear_mbx_register = ice_sriov_clear_mbx_register, .trigger_reset_register = ice_sriov_trigger_reset_register, .poll_reset_status = ice_sriov_poll_reset_status, .clear_reset_trigger = ice_sriov_clear_reset_trigger, - .vsi_rebuild = ice_sriov_vsi_rebuild, + .irq_close = NULL, + .create_vsi = ice_sriov_create_vsi, .post_vsi_rebuild = ice_sriov_post_vsi_rebuild, }; @@ -879,21 +848,9 @@ static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs) /* set sriov vf ops for VFs created during SRIOV flow */ vf->vf_ops = &ice_sriov_vf_ops; - vf->vf_sw_id = pf->first_sw; - /* assign default capabilities */ - vf->spoofchk = true; - vf->num_vf_qs = pf->vfs.num_qps_per; - ice_vc_set_default_allowlist(vf); - - /* ctrl_vsi_idx will be set to a valid value only when VF - * creates its first fdir rule. - */ - ice_vf_ctrl_invalidate_vsi(vf); - ice_vf_fdir_init(vf); - - ice_virtchnl_set_dflt_ops(vf); + ice_initialize_vf_entry(vf); - mutex_init(&vf->cfg_lock); + vf->vf_sw_id = pf->first_sw; hash_add_rcu(vfs->table, &vf->entry, vf_id); } @@ -1285,7 +1242,7 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) goto out_put_vf; ivi->vf = vf_id; - ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr); + ether_addr_copy(ivi->mac, vf->hw_lan_addr); /* VF configuration for VLAN and applicable QoS */ ivi->vlan = ice_vf_get_port_vlan_id(vf); @@ -1333,8 +1290,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) return -EINVAL; /* nothing left to do, unicast MAC already set */ - if (ether_addr_equal(vf->dev_lan_addr.addr, mac) && - ether_addr_equal(vf->hw_lan_addr.addr, mac)) { + if (ether_addr_equal(vf->dev_lan_addr, mac) && + ether_addr_equal(vf->hw_lan_addr, mac)) { ret = 0; goto out_put_vf; } @@ -1348,8 +1305,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* VF is notified of its new MAC via the PF's response to the * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset */ - ether_addr_copy(vf->dev_lan_addr.addr, mac); - ether_addr_copy(vf->hw_lan_addr.addr, mac); + ether_addr_copy(vf->dev_lan_addr, mac); + ether_addr_copy(vf->hw_lan_addr, mac); if (is_zero_ether_addr(mac)) { /* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */ vf->pf_set_mac = false; @@ -1750,7 +1707,7 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf) dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n", vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id, - vf->dev_lan_addr.addr, + vf->dev_lan_addr, test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags) ? "on" : "off"); } @@ -1794,7 +1751,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", vf->mdd_tx_events.count, hw->pf_id, vf->vf_id, - vf->dev_lan_addr.addr); + vf->dev_lan_addr); } } mutex_unlock(&pf->vfs.table_lock); @@ -1884,7 +1841,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, if (pf_vsi) dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", - &vf->dev_lan_addr.addr[0], + &vf->dev_lan_addr[0], pf_vsi->netdev->dev_addr); } } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index c3b406df269f..0e57bd1b85fd 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -237,16 +237,49 @@ static void ice_vf_clear_counters(struct ice_vf *vf) */ static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf) { + /* Close any IRQ mapping now */ + if (vf->vf_ops->irq_close) + vf->vf_ops->irq_close(vf); + ice_vf_clear_counters(vf); vf->vf_ops->clear_reset_trigger(vf); } /** + * ice_vf_recreate_vsi - Release and re-create the VF's VSI + * @vf: VF to recreate the VSI for + * + * This is only called when a single VF is being reset (i.e. VVF, VFLR, host + * VF configuration change, etc) + * + * It releases and then re-creates a new VSI. + */ +static int ice_vf_recreate_vsi(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + int err; + + ice_vf_vsi_release(vf); + + err = vf->vf_ops->create_vsi(vf); + if (err) { + dev_err(ice_pf_to_dev(pf), + "Failed to recreate the VF%u's VSI, error %d\n", + vf->vf_id, err); + return err; + } + + return 0; +} + +/** * ice_vf_rebuild_vsi - rebuild the VF's VSI * @vf: VF to rebuild the VSI for * * This is only called when all VF(s) are being reset (i.e. PCIe Reset on the * host, PFR, CORER, etc.). + * + * It reprograms the VSI configuration back into hardware. */ static int ice_vf_rebuild_vsi(struct ice_vf *vf) { @@ -271,6 +304,21 @@ static int ice_vf_rebuild_vsi(struct ice_vf *vf) } /** + * ice_vf_post_vsi_rebuild - Reset tasks that occur after VSI rebuild + * @vf: the VF being reset + * + * Perform reset tasks which must occur after the VSI has been re-created or + * rebuilt during a VF reset. + */ +static void ice_vf_post_vsi_rebuild(struct ice_vf *vf) +{ + ice_vf_rebuild_host_cfg(vf); + ice_vf_set_initialized(vf); + + vf->vf_ops->post_vsi_rebuild(vf); +} + +/** * ice_is_any_vf_in_unicast_promisc - check if any VF(s) * are in unicast promiscuous mode * @pf: PF structure for accessing VF(s) @@ -495,7 +543,7 @@ void ice_reset_all_vfs(struct ice_pf *pf) ice_vf_pre_vsi_rebuild(vf); ice_vf_rebuild_vsi(vf); - vf->vf_ops->post_vsi_rebuild(vf); + ice_vf_post_vsi_rebuild(vf); mutex_unlock(&vf->cfg_lock); } @@ -639,14 +687,14 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) ice_vf_pre_vsi_rebuild(vf); - if (vf->vf_ops->vsi_rebuild(vf)) { + if (ice_vf_recreate_vsi(vf)) { dev_err(dev, "Failed to release and setup the VF%u's VSI\n", vf->vf_id); err = -EFAULT; goto out_unlock; } - vf->vf_ops->post_vsi_rebuild(vf); + ice_vf_post_vsi_rebuild(vf); vsi = ice_get_vf_vsi(vf); if (WARN_ON(!vsi)) { err = -EINVAL; @@ -673,7 +721,7 @@ out_unlock: * ice_set_vf_state_qs_dis - Set VF queues state to disabled * @vf: pointer to the VF structure */ -void ice_set_vf_state_qs_dis(struct ice_vf *vf) +static void ice_set_vf_state_qs_dis(struct ice_vf *vf) { /* Clear Rx/Tx enabled queues flag */ bitmap_zero(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF); @@ -681,9 +729,45 @@ void ice_set_vf_state_qs_dis(struct ice_vf *vf) clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); } +/** + * ice_set_vf_state_dis - Set VF state to disabled + * @vf: pointer to the VF structure + */ +void ice_set_vf_state_dis(struct ice_vf *vf) +{ + ice_set_vf_state_qs_dis(vf); + vf->vf_ops->clear_reset_state(vf); +} + /* Private functions only accessed from other virtualization files */ /** + * ice_initialize_vf_entry - Initialize a VF entry + * @vf: pointer to the VF structure + */ +void ice_initialize_vf_entry(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + struct ice_vfs *vfs; + + vfs = &pf->vfs; + + /* assign default capabilities */ + vf->spoofchk = true; + vf->num_vf_qs = vfs->num_qps_per; + ice_vc_set_default_allowlist(vf); + ice_virtchnl_set_dflt_ops(vf); + + /* ctrl_vsi_idx will be set to a valid value only when iAVF + * creates its first fdir rule. + */ + ice_vf_ctrl_invalidate_vsi(vf); + ice_vf_fdir_init(vf); + + mutex_init(&vf->cfg_lock); +} + +/** * ice_dis_vf_qs - Disable the VF queues * @vf: pointer to the VF structure */ @@ -924,18 +1008,18 @@ static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf) vf->num_mac++; - if (is_valid_ether_addr(vf->hw_lan_addr.addr)) { - status = ice_fltr_add_mac(vsi, vf->hw_lan_addr.addr, + if (is_valid_ether_addr(vf->hw_lan_addr)) { + status = ice_fltr_add_mac(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); if (status) { dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %d\n", - &vf->hw_lan_addr.addr[0], vf->vf_id, + &vf->hw_lan_addr[0], vf->vf_id, status); return status; } vf->num_mac++; - ether_addr_copy(vf->dev_lan_addr.addr, vf->hw_lan_addr.addr); + ether_addr_copy(vf->dev_lan_addr, vf->hw_lan_addr); } return 0; @@ -1115,11 +1199,16 @@ void ice_vf_ctrl_vsi_release(struct ice_vf *vf) */ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) { - struct ice_port_info *pi = ice_vf_get_port_info(vf); + struct ice_vsi_cfg_params params = {}; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf, NULL); + params.type = ICE_VSI_CTRL; + params.pi = ice_vf_get_port_info(vf); + params.vf = vf; + params.flags = ICE_VSI_FLAG_INIT; + + vsi = ice_vsi_setup(pf, ¶ms); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); ice_vf_ctrl_invalidate_vsi(vf); @@ -1129,6 +1218,60 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) } /** + * ice_vf_init_host_cfg - Initialize host admin configuration + * @vf: VF to initialize + * @vsi: the VSI created at initialization + * + * Initialize the VF host configuration. Called during VF creation to setup + * VLAN 0, add the VF VSI broadcast filter, and setup spoof checking. It + * should only be called during VF creation. + */ +int ice_vf_init_host_cfg(struct ice_vf *vf, struct ice_vsi *vsi) +{ + struct ice_vsi_vlan_ops *vlan_ops; + struct ice_pf *pf = vf->pf; + u8 broadcast[ETH_ALEN]; + struct device *dev; + int err; + + dev = ice_pf_to_dev(pf); + + err = ice_vsi_add_vlan_zero(vsi); + if (err) { + dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n", + vf->vf_id); + return err; + } + + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); + err = vlan_ops->ena_rx_filtering(vsi); + if (err) { + dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n", + vf->vf_id); + return err; + } + + eth_broadcast_addr(broadcast); + err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); + if (err) { + dev_err(dev, "Failed to add broadcast MAC filter for VF %d, status %d\n", + vf->vf_id, err); + return err; + } + + vf->num_mac = 1; + + err = ice_vsi_apply_spoofchk(vsi, vf->spoofchk); + if (err) { + dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n", + vf->vf_id); + return err; + } + + return 0; +} + +/** * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access * @vf: VF to remove access to VSI for */ @@ -1139,6 +1282,24 @@ void ice_vf_invalidate_vsi(struct ice_vf *vf) } /** + * ice_vf_vsi_release - Release the VF VSI and invalidate indexes + * @vf: pointer to the VF structure + * + * Release the VF associated with this VSI and then invalidate the VSI + * indexes. + */ +void ice_vf_vsi_release(struct ice_vf *vf) +{ + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + + if (WARN_ON(!vsi)) + return; + + ice_vsi_release(vsi); + ice_vf_invalidate_vsi(vf); +} + +/** * ice_vf_set_initialized - VF is ready for VIRTCHNL communication * @vf: VF to set in initialized state * diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 52bd9a3816bf..ef30f05b5d02 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -56,11 +56,13 @@ struct ice_mdd_vf_events { struct ice_vf_ops { enum ice_disq_rst_src reset_type; void (*free)(struct ice_vf *vf); + void (*clear_reset_state)(struct ice_vf *vf); void (*clear_mbx_register)(struct ice_vf *vf); void (*trigger_reset_register)(struct ice_vf *vf, bool is_vflr); bool (*poll_reset_status)(struct ice_vf *vf); void (*clear_reset_trigger)(struct ice_vf *vf); - int (*vsi_rebuild)(struct ice_vf *vf); + void (*irq_close)(struct ice_vf *vf); + int (*create_vsi)(struct ice_vf *vf); void (*post_vsi_rebuild)(struct ice_vf *vf); }; @@ -96,8 +98,8 @@ struct ice_vf { struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ struct virtchnl_version_info vf_ver; u32 driver_caps; /* reported by VF driver */ - struct virtchnl_ether_addr dev_lan_addr; - struct virtchnl_ether_addr hw_lan_addr; + u8 dev_lan_addr[ETH_ALEN]; + u8 hw_lan_addr[ETH_ALEN]; struct ice_time_mac legacy_last_added_umac; DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF); DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF); @@ -213,7 +215,7 @@ u16 ice_get_num_vfs(struct ice_pf *pf); struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); bool ice_is_vf_disabled(struct ice_vf *vf); int ice_check_vf_ready_for_cfg(struct ice_vf *vf); -void ice_set_vf_state_qs_dis(struct ice_vf *vf); +void ice_set_vf_state_dis(struct ice_vf *vf); bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf); void ice_vf_get_promisc_masks(struct ice_vf *vf, struct ice_vsi *vsi, @@ -259,7 +261,7 @@ static inline int ice_check_vf_ready_for_cfg(struct ice_vf *vf) return -EOPNOTSUPP; } -static inline void ice_set_vf_state_qs_dis(struct ice_vf *vf) +static inline void ice_set_vf_state_dis(struct ice_vf *vf) { } diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h index 9c8ef2b01f0f..6f3293b793b5 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h @@ -23,6 +23,7 @@ #warning "Only include ice_vf_lib_private.h in CONFIG_PCI_IOV virtualization files" #endif +void ice_initialize_vf_entry(struct ice_vf *vf); void ice_dis_vf_qs(struct ice_vf *vf); int ice_check_vf_init(struct ice_vf *vf); enum virtchnl_status_code ice_err_to_virt_err(int err); @@ -35,7 +36,9 @@ void ice_vf_rebuild_host_cfg(struct ice_vf *vf); void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf); void ice_vf_ctrl_vsi_release(struct ice_vf *vf); struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf); +int ice_vf_init_host_cfg(struct ice_vf *vf, struct ice_vsi *vsi); void ice_vf_invalidate_vsi(struct ice_vf *vf); +void ice_vf_vsi_release(struct ice_vf *vf); void ice_vf_set_initialized(struct ice_vf *vf); #endif /* _ICE_VF_LIB_PRIVATE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index dab3cd5d300e..e24e3f5017ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -507,7 +507,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV; vfres->vsi_res[0].num_queue_pairs = vsi->num_txq; ether_addr_copy(vfres->vsi_res[0].default_mac_addr, - vf->hw_lan_addr.addr); + vf->hw_lan_addr); /* match guest capabilities */ vf->driver_caps = vfres->vf_cap_flags; @@ -1802,10 +1802,10 @@ ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr) * was correctly specified over VIRTCHNL */ if ((ice_is_vc_addr_legacy(vc_ether_addr) && - is_zero_ether_addr(vf->hw_lan_addr.addr)) || + is_zero_ether_addr(vf->hw_lan_addr)) || ice_is_vc_addr_primary(vc_ether_addr)) { - ether_addr_copy(vf->dev_lan_addr.addr, mac_addr); - ether_addr_copy(vf->hw_lan_addr.addr, mac_addr); + ether_addr_copy(vf->dev_lan_addr, mac_addr); + ether_addr_copy(vf->hw_lan_addr, mac_addr); } /* hardware and device MACs are already set, but its possible that the @@ -1836,7 +1836,7 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, int ret; /* device MAC already added */ - if (ether_addr_equal(mac_addr, vf->dev_lan_addr.addr)) + if (ether_addr_equal(mac_addr, vf->dev_lan_addr)) return 0; if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) { @@ -1891,8 +1891,8 @@ ice_update_legacy_cached_mac(struct ice_vf *vf, ice_is_legacy_umac_expired(&vf->legacy_last_added_umac)) return; - ether_addr_copy(vf->dev_lan_addr.addr, vf->legacy_last_added_umac.addr); - ether_addr_copy(vf->hw_lan_addr.addr, vf->legacy_last_added_umac.addr); + ether_addr_copy(vf->dev_lan_addr, vf->legacy_last_added_umac.addr); + ether_addr_copy(vf->hw_lan_addr, vf->legacy_last_added_umac.addr); } /** @@ -1906,15 +1906,15 @@ ice_vfhw_mac_del(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr) u8 *mac_addr = vc_ether_addr->addr; if (!is_valid_ether_addr(mac_addr) || - !ether_addr_equal(vf->dev_lan_addr.addr, mac_addr)) + !ether_addr_equal(vf->dev_lan_addr, mac_addr)) return; /* allow the device MAC to be repopulated in the add flow and don't - * clear the hardware MAC (i.e. hw_lan_addr.addr) here as that is meant + * clear the hardware MAC (i.e. hw_lan_addr) here as that is meant * to be persistent on VM reboot and across driver unload/load, which * won't work if we clear the hardware MAC here */ - eth_zero_addr(vf->dev_lan_addr.addr); + eth_zero_addr(vf->dev_lan_addr); ice_update_legacy_cached_mac(vf, vc_ether_addr); } @@ -1934,7 +1934,7 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, int status; if (!ice_can_vf_change_mac(vf) && - ether_addr_equal(vf->dev_lan_addr.addr, mac_addr)) + ether_addr_equal(vf->dev_lan_addr, mac_addr)) return 0; status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI); @@ -3733,7 +3733,7 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) int result; if (!is_unicast_ether_addr(mac_addr) || - ether_addr_equal(mac_addr, vf->hw_lan_addr.addr)) + ether_addr_equal(mac_addr, vf->hw_lan_addr)) continue; if (vf->pf_set_mac) { diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index c6a58343d81d..e6ef6b303222 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -113,7 +113,7 @@ ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id) if (!ice_vc_isvalid_vsi_id(vf, vsi_id)) return -EINVAL; - if (!pf->vsi[vf->lan_vsi_idx]) + if (!ice_get_vf_vsi(vf)) return -EINVAL; return 0; @@ -494,7 +494,7 @@ ice_vc_fdir_rem_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, int tun) vf_prof = fdir->fdir_prof[flow]; - vf_vsi = pf->vsi[vf->lan_vsi_idx]; + vf_vsi = ice_get_vf_vsi(vf); if (!vf_vsi) { dev_dbg(dev, "NULL vf %d vsi pointer\n", vf->vf_id); return; @@ -572,7 +572,7 @@ ice_vc_fdir_write_flow_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, pf = vf->pf; dev = ice_pf_to_dev(pf); hw = &pf->hw; - vf_vsi = pf->vsi[vf->lan_vsi_idx]; + vf_vsi = ice_get_vf_vsi(vf); if (!vf_vsi) return -EINVAL; @@ -1205,7 +1205,7 @@ static int ice_vc_fdir_write_fltr(struct ice_vf *vf, pf = vf->pf; dev = ice_pf_to_dev(pf); hw = &pf->hw; - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { dev_dbg(dev, "Invalid vsi for VF %d\n", vf->vf_id); return -EINVAL; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c56b991fa610..45fbd8346de7 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2810,6 +2810,22 @@ static int igb_offload_txtime(struct igb_adapter *adapter, return 0; } +static int igb_tc_query_caps(struct igb_adapter *adapter, + struct tc_query_caps_base *base) +{ + switch (base->type) { + case TC_SETUP_QDISC_TAPRIO: { + struct tc_taprio_caps *caps = base->caps; + + caps->broken_mqprio = true; + + return 0; + } + default: + return -EOPNOTSUPP; + } +} + static LIST_HEAD(igb_block_cb_list); static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, @@ -2818,6 +2834,8 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, struct igb_adapter *adapter = netdev_priv(dev); switch (type) { + case TC_QUERY_CAPS: + return igb_tc_query_caps(adapter, type_data); case TC_SETUP_QDISC_CBS: return igb_offload_cbs(adapter, type_data); case TC_SETUP_BLOCK: diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index cf7f6a5eea3d..4c626f756a8b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6214,10 +6214,10 @@ static int igc_tc_query_caps(struct igc_adapter *adapter, case TC_SETUP_QDISC_TAPRIO: { struct tc_taprio_caps *caps = base->caps; - if (hw->mac.type != igc_i225) - return -EOPNOTSUPP; + caps->broken_mqprio = true; - caps->gate_mask_per_txq = true; + if (hw->mac.type == igc_i225) + caps->gate_mask_per_txq = true; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 9b44557e7271..66ec7932f008 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -817,9 +817,12 @@ static void comp_irqs_release(struct mlx5_core_dev *dev) static int comp_irqs_request(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; + const struct cpumask *prev = cpu_none_mask; + const struct cpumask *mask; int ncomp_eqs = table->num_comp_eqs; u16 *cpus; int ret; + int cpu; int i; ncomp_eqs = table->num_comp_eqs; @@ -838,8 +841,19 @@ static int comp_irqs_request(struct mlx5_core_dev *dev) ret = -ENOMEM; goto free_irqs; } - for (i = 0; i < ncomp_eqs; i++) - cpus[i] = cpumask_local_spread(i, dev->priv.numa_node); + + i = 0; + rcu_read_lock(); + for_each_numa_hop_mask(mask, dev->priv.numa_node) { + for_each_cpu_andnot(cpu, mask, prev) { + cpus[i] = cpu; + if (++i == ncomp_eqs) + goto spread_done; + } + prev = mask; + } +spread_done: + rcu_read_unlock(); ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); kfree(cpus); if (ret < 0) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 42422a106433..22db0bb15c45 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1750,29 +1750,12 @@ static const struct devlink_ops mlxsw_devlink_ops = { static int mlxsw_core_params_register(struct mlxsw_core *mlxsw_core) { - int err; - - err = mlxsw_core_fw_params_register(mlxsw_core); - if (err) - return err; - - if (mlxsw_core->driver->params_register) { - err = mlxsw_core->driver->params_register(mlxsw_core); - if (err) - goto err_params_register; - } - return 0; - -err_params_register: - mlxsw_core_fw_params_unregister(mlxsw_core); - return err; + return mlxsw_core_fw_params_register(mlxsw_core); } static void mlxsw_core_params_unregister(struct mlxsw_core *mlxsw_core) { mlxsw_core_fw_params_unregister(mlxsw_core); - if (mlxsw_core->driver->params_register) - mlxsw_core->driver->params_unregister(mlxsw_core); } struct mlxsw_core_health_event { @@ -2201,6 +2184,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_devlink_alloc; } devl_lock(devlink); + devl_register(devlink); } mlxsw_core = devlink_priv(devlink); @@ -2284,10 +2268,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_driver_init; } - if (!reload) { + if (!reload) devl_unlock(devlink); - devlink_register(devlink); - } return 0; err_driver_init: @@ -2319,6 +2301,7 @@ err_register_resources: err_bus_init: mlxsw_core_irq_event_handler_fini(mlxsw_core); if (!reload) { + devl_unregister(devlink); devl_unlock(devlink); devlink_free(devlink); } @@ -2357,10 +2340,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, { struct devlink *devlink = priv_to_devlink(mlxsw_core); - if (!reload) { - devlink_unregister(devlink); + if (!reload) devl_lock(devlink); - } if (devlink_is_reload_failed(devlink)) { if (!reload) @@ -2389,6 +2370,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, mlxsw_core->bus->fini(mlxsw_core->bus_priv); mlxsw_core_irq_event_handler_fini(mlxsw_core); if (!reload) { + devl_unregister(devlink); devl_unlock(devlink); devlink_free(devlink); } @@ -2398,6 +2380,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, reload_fail_deinit: mlxsw_core_params_unregister(mlxsw_core); devl_resources_unregister(devlink); + devl_unregister(devlink); devl_unlock(devlink); devlink_free(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index a77cb0be7108..e5474d3e34db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -421,8 +421,6 @@ struct mlxsw_driver { const struct mlxsw_config_profile *profile, u64 *p_single_size, u64 *p_double_size, u64 *p_linear_size); - int (*params_register)(struct mlxsw_core *mlxsw_core); - void (*params_unregister)(struct mlxsw_core *mlxsw_core); /* Notify a driver that a timestamped packet was transmitted. Driver * is responsible for freeing the passed-in SKB. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index b0bdb9640ebf..a8f94b7544ee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3861,62 +3861,6 @@ static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core, return 0; } -static int -mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - - ctx->val.vu32 = mlxsw_sp_acl_region_rehash_intrvl_get(mlxsw_sp); - return 0; -} - -static int -mlxsw_sp_params_acl_region_rehash_intrvl_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - - return mlxsw_sp_acl_region_rehash_intrvl_set(mlxsw_sp, ctx->val.vu32); -} - -static const struct devlink_param mlxsw_sp2_devlink_params[] = { - DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, - "acl_region_rehash_interval", - DEVLINK_PARAM_TYPE_U32, - BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlxsw_sp_params_acl_region_rehash_intrvl_get, - mlxsw_sp_params_acl_region_rehash_intrvl_set, - NULL), -}; - -static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core) -{ - struct devlink *devlink = priv_to_devlink(mlxsw_core); - union devlink_param_value value; - int err; - - err = devl_params_register(devlink, mlxsw_sp2_devlink_params, - ARRAY_SIZE(mlxsw_sp2_devlink_params)); - if (err) - return err; - - value.vu32 = 0; - devl_param_driverinit_value_set(devlink, - MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, - value); - return 0; -} - -static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) -{ - devl_params_unregister(priv_to_devlink(mlxsw_core), - mlxsw_sp2_devlink_params, - ARRAY_SIZE(mlxsw_sp2_devlink_params)); -} - static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, u16 local_port) { @@ -3994,8 +3938,6 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, - .params_register = mlxsw_sp2_params_register, - .params_unregister = mlxsw_sp2_params_unregister, .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, @@ -4033,8 +3975,6 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, - .params_register = mlxsw_sp2_params_register, - .params_unregister = mlxsw_sp2_params_unregister, .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, @@ -4070,8 +4010,6 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, - .params_register = mlxsw_sp2_params_register, - .params_unregister = mlxsw_sp2_params_unregister, .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp4_config_profile, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bbc73324451d..4c22f8004514 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -973,6 +973,7 @@ enum mlxsw_sp_acl_profile { }; struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); +struct mlxsw_sp_acl_tcam *mlxsw_sp_acl_to_tcam(struct mlxsw_sp_acl *acl); int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_flow_block *block, @@ -1096,8 +1097,6 @@ mlxsw_sp_acl_act_cookie_lookup(struct mlxsw_sp *mlxsw_sp, u32 cookie_index) int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); -u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val); struct mlxsw_sp_acl_mangle_action; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 6c5af018546f..0423ac262d89 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -40,6 +40,11 @@ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl) return acl->afk; } +struct mlxsw_sp_acl_tcam *mlxsw_sp_acl_to_tcam(struct mlxsw_sp_acl *acl) +{ + return &acl->tcam; +} + struct mlxsw_sp_acl_ruleset_ht_key { struct mlxsw_sp_flow_block *block; u32 chain_index; @@ -1099,22 +1104,6 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) kfree(acl); } -u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - - return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(mlxsw_sp, - &acl->tcam); -} - -int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val) -{ - struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - - return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(mlxsw_sp, - &acl->tcam, val); -} - struct mlxsw_sp_acl_rulei_ops mlxsw_sp1_acl_rulei_ops = { .act_mangle_field = mlxsw_sp1_acl_rulei_act_mangle_field, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 3b9ba8fa247a..d50786b0a6ce 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -9,6 +9,7 @@ #include <linux/rhashtable.h> #include <linux/netdevice.h> #include <linux/mutex.h> +#include <net/devlink.h> #include <trace/events/mlxsw.h> #include "reg.h" @@ -29,67 +30,6 @@ size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN 3000 /* ms */ #define MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS 100 /* number of entries */ -int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam *tcam) -{ - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - u64 max_tcam_regions; - u64 max_regions; - u64 max_groups; - int err; - - mutex_init(&tcam->lock); - tcam->vregion_rehash_intrvl = - MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT; - INIT_LIST_HEAD(&tcam->vregion_list); - - max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, - ACL_MAX_TCAM_REGIONS); - max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); - - /* Use 1:1 mapping between ACL region and TCAM region */ - if (max_tcam_regions < max_regions) - max_regions = max_tcam_regions; - - tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); - if (!tcam->used_regions) - return -ENOMEM; - tcam->max_regions = max_regions; - - max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); - tcam->used_groups = bitmap_zalloc(max_groups, GFP_KERNEL); - if (!tcam->used_groups) { - err = -ENOMEM; - goto err_alloc_used_groups; - } - tcam->max_groups = max_groups; - tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, - ACL_MAX_GROUP_SIZE); - - err = ops->init(mlxsw_sp, tcam->priv, tcam); - if (err) - goto err_tcam_init; - - return 0; - -err_tcam_init: - bitmap_free(tcam->used_groups); -err_alloc_used_groups: - bitmap_free(tcam->used_regions); - return err; -} - -void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam *tcam) -{ - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - - mutex_destroy(&tcam->lock); - ops->fini(mlxsw_sp, tcam->priv); - bitmap_free(tcam->used_groups); - bitmap_free(tcam->used_regions); -} - int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 *priority, bool fillup_priority) @@ -893,41 +833,6 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, kfree(vregion); } -u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam *tcam) -{ - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - u32 vregion_rehash_intrvl; - - if (WARN_ON(!ops->region_rehash_hints_get)) - return 0; - vregion_rehash_intrvl = tcam->vregion_rehash_intrvl; - return vregion_rehash_intrvl; -} - -int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam *tcam, - u32 val) -{ - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_vregion *vregion; - - if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val) - return -EINVAL; - if (WARN_ON(!ops->region_rehash_hints_get)) - return -EOPNOTSUPP; - tcam->vregion_rehash_intrvl = val; - mutex_lock(&tcam->lock); - list_for_each_entry(vregion, &tcam->vregion_list, tlist) { - if (val) - mlxsw_core_schedule_dw(&vregion->rehash.dw, 0); - else - cancel_delayed_work_sync(&vregion->rehash.dw); - } - mutex_unlock(&tcam->lock); - return 0; -} - static struct mlxsw_sp_acl_tcam_vregion * mlxsw_sp_acl_tcam_vregion_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vgroup *vgroup, @@ -1542,6 +1447,153 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_vregion_rehash_end(mlxsw_sp, vregion, ctx); } +static int +mlxsw_sp_acl_tcam_region_rehash_intrvl_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp_acl_tcam *tcam; + struct mlxsw_sp *mlxsw_sp; + + mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + tcam = mlxsw_sp_acl_to_tcam(mlxsw_sp->acl); + ctx->val.vu32 = tcam->vregion_rehash_intrvl; + + return 0; +} + +static int +mlxsw_sp_acl_tcam_region_rehash_intrvl_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp_acl_tcam_vregion *vregion; + struct mlxsw_sp_acl_tcam *tcam; + struct mlxsw_sp *mlxsw_sp; + u32 val = ctx->val.vu32; + + if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val) + return -EINVAL; + + mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + tcam = mlxsw_sp_acl_to_tcam(mlxsw_sp->acl); + tcam->vregion_rehash_intrvl = val; + mutex_lock(&tcam->lock); + list_for_each_entry(vregion, &tcam->vregion_list, tlist) { + if (val) + mlxsw_core_schedule_dw(&vregion->rehash.dw, 0); + else + cancel_delayed_work_sync(&vregion->rehash.dw); + } + mutex_unlock(&tcam->lock); + return 0; +} + +static const struct devlink_param mlxsw_sp_acl_tcam_rehash_params[] = { + DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + "acl_region_rehash_interval", + DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlxsw_sp_acl_tcam_region_rehash_intrvl_get, + mlxsw_sp_acl_tcam_region_rehash_intrvl_set, + NULL), +}; + +static int mlxsw_sp_acl_tcam_rehash_params_register(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + if (!mlxsw_sp->acl_tcam_ops->region_rehash_hints_get) + return 0; + + return devl_params_register(devlink, mlxsw_sp_acl_tcam_rehash_params, + ARRAY_SIZE(mlxsw_sp_acl_tcam_rehash_params)); +} + +static void +mlxsw_sp_acl_tcam_rehash_params_unregister(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + if (!mlxsw_sp->acl_tcam_ops->region_rehash_hints_get) + return; + + devl_params_unregister(devlink, mlxsw_sp_acl_tcam_rehash_params, + ARRAY_SIZE(mlxsw_sp_acl_tcam_rehash_params)); +} + +int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + u64 max_tcam_regions; + u64 max_regions; + u64 max_groups; + int err; + + mutex_init(&tcam->lock); + tcam->vregion_rehash_intrvl = + MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT; + INIT_LIST_HEAD(&tcam->vregion_list); + + err = mlxsw_sp_acl_tcam_rehash_params_register(mlxsw_sp); + if (err) + goto err_rehash_params_register; + + max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_TCAM_REGIONS); + max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); + + /* Use 1:1 mapping between ACL region and TCAM region */ + if (max_tcam_regions < max_regions) + max_regions = max_tcam_regions; + + tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); + if (!tcam->used_regions) { + err = -ENOMEM; + goto err_alloc_used_regions; + } + tcam->max_regions = max_regions; + + max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); + tcam->used_groups = bitmap_zalloc(max_groups, GFP_KERNEL); + if (!tcam->used_groups) { + err = -ENOMEM; + goto err_alloc_used_groups; + } + tcam->max_groups = max_groups; + tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_GROUP_SIZE); + + err = ops->init(mlxsw_sp, tcam->priv, tcam); + if (err) + goto err_tcam_init; + + return 0; + +err_tcam_init: + bitmap_free(tcam->used_groups); +err_alloc_used_groups: + bitmap_free(tcam->used_regions); +err_alloc_used_regions: + mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp); +err_rehash_params_register: + mutex_destroy(&tcam->lock); + return err; +} + +void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + ops->fini(mlxsw_sp, tcam->priv); + bitmap_free(tcam->used_groups); + bitmap_free(tcam->used_regions); + mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp); + mutex_destroy(&tcam->lock); +} + static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_SRC_SYS_PORT, MLXSW_AFK_ELEMENT_DMAC_32_47, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index edbbc89e7a71..462bf448497d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -29,11 +29,6 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); -u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam *tcam); -int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam *tcam, - u32 val); int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 *priority, bool fillup_priority); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index 88c655d6318f..1e464bb804ae 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -234,6 +234,24 @@ static int lan966x_tc_flower_del(struct lan966x_port *port, return err; } +static int lan966x_tc_flower_stats(struct lan966x_port *port, + struct flow_cls_offload *f, + struct vcap_admin *admin) +{ + struct vcap_counter count = {}; + int err; + + err = vcap_get_rule_count_by_cookie(port->lan966x->vcap_ctrl, + &count, f->cookie); + if (err) + return err; + + flow_stats_update(&f->stats, 0x0, count.value, 0, 0, + FLOW_ACTION_HW_STATS_IMMEDIATE); + + return err; +} + int lan966x_tc_flower(struct lan966x_port *port, struct flow_cls_offload *f, bool ingress) @@ -252,6 +270,8 @@ int lan966x_tc_flower(struct lan966x_port *port, return lan966x_tc_flower_add(port, f, admin, ingress); case FLOW_CLS_DESTROY: return lan966x_tc_flower_del(port, f, admin); + case FLOW_CLS_STATS: + return lan966x_tc_flower_stats(port, f, admin); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index e9d228d7a95d..807b86667bca 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1908,12 +1908,12 @@ nfp_net_get_eeprom(struct net_device *netdev, struct nfp_app *app = nfp_app_from_netdev(netdev); u8 buf[NFP_EEPROM_LEN] = {}; - if (eeprom->len == 0) - return -EINVAL; - if (nfp_net_get_port_mac_by_hwinfo(netdev, buf)) return -EOPNOTSUPP; + if (eeprom->len == 0) + return -EINVAL; + eeprom->magic = app->pdev->vendor | (app->pdev->device << 16); memcpy(bytes, buf + eeprom->offset, eeprom->len); @@ -1927,15 +1927,15 @@ nfp_net_set_eeprom(struct net_device *netdev, struct nfp_app *app = nfp_app_from_netdev(netdev); u8 buf[NFP_EEPROM_LEN] = {}; + if (nfp_net_get_port_mac_by_hwinfo(netdev, buf)) + return -EOPNOTSUPP; + if (eeprom->len == 0) return -EINVAL; if (eeprom->magic != (app->pdev->vendor | app->pdev->device << 16)) return -EINVAL; - if (nfp_net_get_port_mac_by_hwinfo(netdev, buf)) - return -EOPNOTSUPP; - memcpy(buf + eeprom->offset, bytes, eeprom->len); if (nfp_net_set_port_mac_by_hwinfo(netdev, buf)) return -EOPNOTSUPP; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index d5b80c31ab91..01677c28e407 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -268,6 +268,9 @@ struct kszphy_type { u16 interrupt_level_mask; u16 cable_diag_reg; unsigned long pair_mask; + u16 disable_dll_tx_bit; + u16 disable_dll_rx_bit; + u16 disable_dll_mask; bool has_broadcast_disable; bool has_nand_tree_disable; bool has_rmii_ref_clk_sel; @@ -364,6 +367,19 @@ static const struct kszphy_type ksz9021_type = { .interrupt_level_mask = BIT(14), }; +static const struct kszphy_type ksz9131_type = { + .interrupt_level_mask = BIT(14), + .disable_dll_tx_bit = BIT(12), + .disable_dll_rx_bit = BIT(12), + .disable_dll_mask = BIT_MASK(12), +}; + +static const struct kszphy_type lan8841_type = { + .disable_dll_tx_bit = BIT(14), + .disable_dll_rx_bit = BIT(14), + .disable_dll_mask = BIT_MASK(14), +}; + static int kszphy_extended_write(struct phy_device *phydev, u32 regnum, u16 val) { @@ -1172,19 +1188,18 @@ static int ksz9131_of_load_skew_values(struct phy_device *phydev, #define KSZ9131RN_MMD_COMMON_CTRL_REG 2 #define KSZ9131RN_RXC_DLL_CTRL 76 #define KSZ9131RN_TXC_DLL_CTRL 77 -#define KSZ9131RN_DLL_CTRL_BYPASS BIT_MASK(12) #define KSZ9131RN_DLL_ENABLE_DELAY 0 -#define KSZ9131RN_DLL_DISABLE_DELAY BIT(12) static int ksz9131_config_rgmii_delay(struct phy_device *phydev) { + const struct kszphy_type *type = phydev->drv->driver_data; u16 rxcdll_val, txcdll_val; int ret; switch (phydev->interface) { case PHY_INTERFACE_MODE_RGMII: - rxcdll_val = KSZ9131RN_DLL_DISABLE_DELAY; - txcdll_val = KSZ9131RN_DLL_DISABLE_DELAY; + rxcdll_val = type->disable_dll_rx_bit; + txcdll_val = type->disable_dll_tx_bit; break; case PHY_INTERFACE_MODE_RGMII_ID: rxcdll_val = KSZ9131RN_DLL_ENABLE_DELAY; @@ -1192,10 +1207,10 @@ static int ksz9131_config_rgmii_delay(struct phy_device *phydev) break; case PHY_INTERFACE_MODE_RGMII_RXID: rxcdll_val = KSZ9131RN_DLL_ENABLE_DELAY; - txcdll_val = KSZ9131RN_DLL_DISABLE_DELAY; + txcdll_val = type->disable_dll_tx_bit; break; case PHY_INTERFACE_MODE_RGMII_TXID: - rxcdll_val = KSZ9131RN_DLL_DISABLE_DELAY; + rxcdll_val = type->disable_dll_rx_bit; txcdll_val = KSZ9131RN_DLL_ENABLE_DELAY; break; default: @@ -1203,13 +1218,13 @@ static int ksz9131_config_rgmii_delay(struct phy_device *phydev) } ret = phy_modify_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, - KSZ9131RN_RXC_DLL_CTRL, KSZ9131RN_DLL_CTRL_BYPASS, + KSZ9131RN_RXC_DLL_CTRL, type->disable_dll_mask, rxcdll_val); if (ret < 0) return ret; return phy_modify_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, - KSZ9131RN_TXC_DLL_CTRL, KSZ9131RN_DLL_CTRL_BYPASS, + KSZ9131RN_TXC_DLL_CTRL, type->disable_dll_mask, txcdll_val); } @@ -3152,6 +3167,146 @@ static int lan8814_probe(struct phy_device *phydev) return 0; } +#define LAN8841_MMD_TIMER_REG 0 +#define LAN8841_MMD0_REGISTER_17 17 +#define LAN8841_MMD0_REGISTER_17_DROP_OPT(x) ((x) & 0x3) +#define LAN8841_MMD0_REGISTER_17_XMIT_TOG_TX_DIS BIT(3) +#define LAN8841_OPERATION_MODE_STRAP_OVERRIDE_LOW_REG 2 +#define LAN8841_OPERATION_MODE_STRAP_OVERRIDE_LOW_REG_MAGJACK BIT(14) +#define LAN8841_MMD_ANALOG_REG 28 +#define LAN8841_ANALOG_CONTROL_1 1 +#define LAN8841_ANALOG_CONTROL_1_PLL_TRIM(x) (((x) & 0x3) << 5) +#define LAN8841_ANALOG_CONTROL_10 13 +#define LAN8841_ANALOG_CONTROL_10_PLL_DIV(x) ((x) & 0x3) +#define LAN8841_ANALOG_CONTROL_11 14 +#define LAN8841_ANALOG_CONTROL_11_LDO_REF(x) (((x) & 0x7) << 12) +#define LAN8841_TX_LOW_I_CH_C_D_POWER_MANAGMENT 69 +#define LAN8841_TX_LOW_I_CH_C_D_POWER_MANAGMENT_VAL 0xbffc +#define LAN8841_BTRX_POWER_DOWN 70 +#define LAN8841_BTRX_POWER_DOWN_QBIAS_CH_A BIT(0) +#define LAN8841_BTRX_POWER_DOWN_BTRX_CH_A BIT(1) +#define LAN8841_BTRX_POWER_DOWN_QBIAS_CH_B BIT(2) +#define LAN8841_BTRX_POWER_DOWN_BTRX_CH_B BIT(3) +#define LAN8841_BTRX_POWER_DOWN_BTRX_CH_C BIT(5) +#define LAN8841_BTRX_POWER_DOWN_BTRX_CH_D BIT(7) +#define LAN8841_ADC_CHANNEL_MASK 198 + +static int lan8841_config_init(struct phy_device *phydev) +{ + int ret; + + ret = ksz9131_config_init(phydev); + if (ret) + return ret; + + /* 100BT Clause 40 improvenent errata */ + phy_write_mmd(phydev, LAN8841_MMD_ANALOG_REG, + LAN8841_ANALOG_CONTROL_1, + LAN8841_ANALOG_CONTROL_1_PLL_TRIM(0x2)); + phy_write_mmd(phydev, LAN8841_MMD_ANALOG_REG, + LAN8841_ANALOG_CONTROL_10, + LAN8841_ANALOG_CONTROL_10_PLL_DIV(0x1)); + + /* 10M/100M Ethernet Signal Tuning Errata for Shorted-Center Tap + * Magnetics + */ + ret = phy_read_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, + LAN8841_OPERATION_MODE_STRAP_OVERRIDE_LOW_REG); + if (ret & LAN8841_OPERATION_MODE_STRAP_OVERRIDE_LOW_REG_MAGJACK) { + phy_write_mmd(phydev, LAN8841_MMD_ANALOG_REG, + LAN8841_TX_LOW_I_CH_C_D_POWER_MANAGMENT, + LAN8841_TX_LOW_I_CH_C_D_POWER_MANAGMENT_VAL); + phy_write_mmd(phydev, LAN8841_MMD_ANALOG_REG, + LAN8841_BTRX_POWER_DOWN, + LAN8841_BTRX_POWER_DOWN_QBIAS_CH_A | + LAN8841_BTRX_POWER_DOWN_BTRX_CH_A | + LAN8841_BTRX_POWER_DOWN_QBIAS_CH_B | + LAN8841_BTRX_POWER_DOWN_BTRX_CH_B | + LAN8841_BTRX_POWER_DOWN_BTRX_CH_C | + LAN8841_BTRX_POWER_DOWN_BTRX_CH_D); + } + + /* LDO Adjustment errata */ + phy_write_mmd(phydev, LAN8841_MMD_ANALOG_REG, + LAN8841_ANALOG_CONTROL_11, + LAN8841_ANALOG_CONTROL_11_LDO_REF(1)); + + /* 100BT RGMII latency tuning errata */ + phy_write_mmd(phydev, MDIO_MMD_PMAPMD, + LAN8841_ADC_CHANNEL_MASK, 0x0); + phy_write_mmd(phydev, LAN8841_MMD_TIMER_REG, + LAN8841_MMD0_REGISTER_17, + LAN8841_MMD0_REGISTER_17_DROP_OPT(2) | + LAN8841_MMD0_REGISTER_17_XMIT_TOG_TX_DIS); + + return 0; +} + +#define LAN8841_OUTPUT_CTRL 25 +#define LAN8841_OUTPUT_CTRL_INT_BUFFER BIT(14) + +static int lan8841_config_intr(struct phy_device *phydev) +{ + int err; + + phy_modify(phydev, LAN8841_OUTPUT_CTRL, + LAN8841_OUTPUT_CTRL_INT_BUFFER, 0); + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = phy_read(phydev, LAN8814_INTS); + if (err) + return err; + + err = phy_write(phydev, LAN8814_INTC, + LAN8814_INT_LINK); + } else { + err = phy_write(phydev, LAN8814_INTC, 0); + if (err) + return err; + + err = phy_read(phydev, LAN8814_INTS); + } + + return err; +} + +static irqreturn_t lan8841_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, LAN8814_INTS); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (irq_status & LAN8814_INT_LINK) { + phy_trigger_machine(phydev); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +#define LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER 3 +#define LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER_STRAP_RGMII_EN BIT(0) + +static int lan8841_probe(struct phy_device *phydev) +{ + int err; + + err = kszphy_probe(phydev); + if (err) + return err; + + if (phy_read_mmd(phydev, KSZ9131RN_MMD_COMMON_CTRL_REG, + LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER) & + LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER_STRAP_RGMII_EN) + phydev->interface = PHY_INTERFACE_MODE_RGMII_RXID; + + return 0; +} + static struct phy_driver ksphy_driver[] = { { .phy_id = PHY_ID_KS8737, @@ -3362,12 +3517,27 @@ static struct phy_driver ksphy_driver[] = { .config_intr = lan8804_config_intr, .handle_interrupt = lan8804_handle_interrupt, }, { + .phy_id = PHY_ID_LAN8841, + .phy_id_mask = MICREL_PHY_ID_MASK, + .name = "Microchip LAN8841 Gigabit PHY", + .driver_data = &lan8841_type, + .config_init = lan8841_config_init, + .probe = lan8841_probe, + .soft_reset = genphy_soft_reset, + .config_intr = lan8841_config_intr, + .handle_interrupt = lan8841_handle_interrupt, + .get_sset_count = kszphy_get_sset_count, + .get_strings = kszphy_get_strings, + .get_stats = kszphy_get_stats, + .suspend = genphy_suspend, + .resume = genphy_resume, +}, { .phy_id = PHY_ID_KSZ9131, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Microchip KSZ9131 Gigabit PHY", /* PHY_GBIT_FEATURES */ .flags = PHY_POLL_CABLE_TEST, - .driver_data = &ksz9021_type, + .driver_data = &ksz9131_type, .probe = kszphy_probe, .config_init = ksz9131_config_init, .config_intr = kszphy_config_intr, @@ -3446,6 +3616,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { { PHY_ID_KSZ886X, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8814, MICREL_PHY_ID_MASK }, { PHY_ID_LAN8804, MICREL_PHY_ID_MASK }, + { PHY_ID_LAN8841, MICREL_PHY_ID_MASK }, { } }; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c2aa0aa26b45..7b16aede7ac5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -391,6 +391,26 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, nr_cpumask_bits, cpumask_check(cpu)); } +/** + * cpumask_nth_and_andnot - get the Nth cpu set in 1st and 2nd cpumask, and clear in 3rd. + * @srcp1: the cpumask pointer + * @srcp2: the cpumask pointer + * @srcp3: the cpumask pointer + * @cpu: the N'th cpu to find, starting from 0 + * + * Returns >= nr_cpu_ids if such cpu doesn't exist. + */ +static __always_inline +unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1, + const struct cpumask *srcp2, + const struct cpumask *srcp3) +{ + return find_nth_and_andnot_bit(cpumask_bits(srcp1), + cpumask_bits(srcp2), + cpumask_bits(srcp3), + nr_cpumask_bits, cpumask_check(cpu)); +} + #define CPU_BITS_NONE \ { \ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ diff --git a/include/linux/find.h b/include/linux/find.h index ccaf61a0f5fd..4647864a5ffd 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -22,6 +22,9 @@ unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long unsigned long size, unsigned long n); unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n); +unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + const unsigned long *addr3, unsigned long size, + unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); @@ -255,6 +258,36 @@ unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned lon return __find_nth_andnot_bit(addr1, addr2, size, n); } +/** + * find_nth_and_andnot_bit - find N'th set bit in 2 memory regions, + * excluding those set in 3rd region + * @addr1: The 1st address to start the search at + * @addr2: The 2nd address to start the search at + * @addr3: The 3rd address to start the search at + * @size: The maximum number of bits to search + * @n: The number of set bit, which position is needed, counting from 0 + * + * Returns the bit number of the N'th set bit. + * If no such, returns @size. + */ +static __always_inline +unsigned long find_nth_and_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long size, unsigned long n) +{ + if (n >= size) + return size; + + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & (~*addr3) & GENMASK(size - 1, 0); + + return val ? fns(val, n) : size; + } + + return __find_nth_and_andnot_bit(addr1, addr2, addr3, size, n); +} + #ifndef find_first_and_bit /** * find_first_and_bit - find the first set bit in both memory regions diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 771e050883db..8bef1ab62bba 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -31,6 +31,7 @@ #define PHY_ID_KSZ9131 0x00221640 #define PHY_ID_LAN8814 0x00221660 #define PHY_ID_LAN8804 0x00221670 +#define PHY_ID_LAN8841 0x00221650 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 diff --git a/include/linux/topology.h b/include/linux/topology.h index 4564faafd0e1..fea32377f7c7 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -245,5 +245,38 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu) return cpumask_of_node(cpu_to_node(cpu)); } +#ifdef CONFIG_NUMA +int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); +extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops); +#else +static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) +{ + return cpumask_nth(cpu, cpus); +} + +static inline const struct cpumask * +sched_numa_hop_mask(unsigned int node, unsigned int hops) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif /* CONFIG_NUMA */ + +/** + * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance + * from a given node. + * @mask: the iteration variable. + * @node: the NUMA node to start the search from. + * + * Requires rcu_lock to be held. + * + * Yields cpu_online_mask for @node == NUMA_NO_NODE. + */ +#define for_each_numa_hop_mask(mask, node) \ + for (unsigned int __hops = 0; \ + mask = (node != NUMA_NO_NODE || __hops) ? \ + sched_numa_hop_mask(node, __hops) : \ + cpu_online_mask, \ + !IS_ERR_OR_NULL(mask); \ + __hops++) #endif /* _LINUX_TOPOLOGY_H */ diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index fd889fc4912b..2016839991a4 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -177,6 +177,11 @@ struct tc_mqprio_qopt_offload { struct tc_taprio_caps { bool supports_queue_max_sdu:1; bool gate_mask_per_txq:1; + /* Device expects lower TXQ numbers to have higher priority over higher + * TXQs, regardless of their TC mapping. DO NOT USE FOR NEW DRIVERS, + * INSTEAD ENFORCE A PROPER TC:TXQ MAPPING COMING FROM USER SPACE. + */ + bool broken_mqprio:1; }; struct tc_taprio_sched_entry { diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8739c2a5a54e..1233affc106c 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -3,6 +3,8 @@ * Scheduler topology setup/handling methods */ +#include <linux/bsearch.h> + DEFINE_MUTEX(sched_domains_mutex); /* Protected by sched_domains_mutex: */ @@ -2067,6 +2069,94 @@ unlock: return found; } +struct __cmp_key { + const struct cpumask *cpus; + struct cpumask ***masks; + int node; + int cpu; + int w; +}; + +static int hop_cmp(const void *a, const void *b) +{ + struct cpumask **prev_hop = *((struct cpumask ***)b - 1); + struct cpumask **cur_hop = *(struct cpumask ***)b; + struct __cmp_key *k = (struct __cmp_key *)a; + + if (cpumask_weight_and(k->cpus, cur_hop[k->node]) <= k->cpu) + return 1; + + k->w = (b == k->masks) ? 0 : cpumask_weight_and(k->cpus, prev_hop[k->node]); + if (k->w <= k->cpu) + return 0; + + return -1; +} + +/* + * sched_numa_find_nth_cpu() - given the NUMA topology, find the Nth next cpu + * closest to @cpu from @cpumask. + * cpumask: cpumask to find a cpu from + * cpu: Nth cpu to find + * + * returns: cpu, or nr_cpu_ids when nothing found. + */ +int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) +{ + struct __cmp_key k = { .cpus = cpus, .node = node, .cpu = cpu }; + struct cpumask ***hop_masks; + int hop, ret = nr_cpu_ids; + + rcu_read_lock(); + + k.masks = rcu_dereference(sched_domains_numa_masks); + if (!k.masks) + goto unlock; + + hop_masks = bsearch(&k, k.masks, sched_domains_numa_levels, sizeof(k.masks[0]), hop_cmp); + hop = hop_masks - k.masks; + + ret = hop ? + cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node], k.masks[hop-1][node]) : + cpumask_nth_and(cpu, cpus, k.masks[0][node]); +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(sched_numa_find_nth_cpu); + +/** + * sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from + * @node + * @node: The node to count hops from. + * @hops: Include CPUs up to that many hops away. 0 means local node. + * + * Return: On success, a pointer to a cpumask of CPUs at most @hops away from + * @node, an error value otherwise. + * + * Requires rcu_lock to be held. Returned cpumask is only valid within that + * read-side section, copy it if required beyond that. + * + * Note that not all hops are equal in distance; see sched_init_numa() for how + * distances and masks are handled. + * Also note that this is a reflection of sched_domains_numa_masks, which may change + * during the lifetime of the system (offline nodes are taken out of the masks). + */ +const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops) +{ + struct cpumask ***masks; + + if (node >= nr_node_ids || hops >= sched_domains_numa_levels) + return ERR_PTR(-EINVAL); + + masks = rcu_dereference(sched_domains_numa_masks); + if (!masks) + return ERR_PTR(-EBUSY); + + return masks[hops][node]; +} +EXPORT_SYMBOL_GPL(sched_numa_hop_mask); + #endif /* CONFIG_NUMA */ static int __sdt_alloc(const struct cpumask *cpu_map) diff --git a/lib/cpumask.c b/lib/cpumask.c index c7c392514fd3..e7258836b60b 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -110,15 +110,33 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_local_spread - select the i'th cpu with local numa cpu's first + * cpumask_local_spread - select the i'th cpu based on NUMA distances * @i: index number * @node: local numa_node * - * This function selects an online CPU according to a numa aware policy; - * local cpus are returned first, followed by non-local ones, then it - * wraps around. + * Returns online CPU according to a numa aware policy; local cpus are returned + * first, followed by non-local ones, then it wraps around. * - * It's not very efficient, but useful for setup. + * For those who wants to enumerate all CPUs based on their NUMA distances, + * i.e. call this function in a loop, like: + * + * for (i = 0; i < num_online_cpus(); i++) { + * cpu = cpumask_local_spread(i, node); + * do_something(cpu); + * } + * + * There's a better alternative based on for_each()-like iterators: + * + * for_each_numa_hop_mask(mask, node) { + * for_each_cpu_andnot(cpu, mask, prev) + * do_something(cpu); + * prev = mask; + * } + * + * It's simpler and more verbose than above. Complexity of iterator-based + * enumeration is O(sched_domains_numa_levels * nr_cpu_ids), while + * cpumask_local_spread() when called for each cpu is + * O(sched_domains_numa_levels * nr_cpu_ids * log(nr_cpu_ids)). */ unsigned int cpumask_local_spread(unsigned int i, int node) { @@ -127,24 +145,12 @@ unsigned int cpumask_local_spread(unsigned int i, int node) /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (node == NUMA_NO_NODE) { - cpu = cpumask_nth(i, cpu_online_mask); - if (cpu < nr_cpu_ids) - return cpu; - } else { - /* NUMA first. */ - cpu = cpumask_nth_and(i, cpu_online_mask, cpumask_of_node(node)); - if (cpu < nr_cpu_ids) - return cpu; - - i -= cpumask_weight_and(cpu_online_mask, cpumask_of_node(node)); - - /* Skip NUMA nodes, done above. */ - cpu = cpumask_nth_andnot(i, cpu_online_mask, cpumask_of_node(node)); - if (cpu < nr_cpu_ids) - return cpu; - } - BUG(); + cpu = (node == NUMA_NO_NODE) ? + cpumask_nth(i, cpu_online_mask) : + sched_numa_find_nth_cpu(cpu_online_mask, i, node); + + WARN_ON(cpu >= nr_cpu_ids); + return cpu; } EXPORT_SYMBOL(cpumask_local_spread); diff --git a/lib/find_bit.c b/lib/find_bit.c index 18bc0a7ac8ee..c10920e66788 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -155,6 +155,15 @@ unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned l } EXPORT_SYMBOL(__find_nth_andnot_bit); +unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long size, unsigned long n) +{ + return FIND_NTH_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], size, n); +} +EXPORT_SYMBOL(__find_nth_and_andnot_bit); + #ifndef find_next_and_bit unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start) diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index 97d30ea98b00..9d6373603340 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -6626,18 +6626,22 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, err = devlink_fmsg_obj_nest_start(fmsg); if (err) - return err; + goto out; err = reporter->ops->diagnose(reporter, fmsg, info->extack); if (err) - return err; + goto out; err = devlink_fmsg_obj_nest_end(fmsg); if (err) - return err; + goto out; + + err = devlink_fmsg_snd(fmsg, info, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); - return devlink_fmsg_snd(fmsg, info, - DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); +out: + devlink_fmsg_free(fmsg); + return err; } static int diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c14018a8052c..e9780631b5b5 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1282,12 +1282,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev, if (err) goto err_out3; - if (ops->init) { - err = ops->init(sch, tca[TCA_OPTIONS], extack); - if (err != 0) - goto err_out5; - } - if (tca[TCA_STAB]) { stab = qdisc_get_stab(tca[TCA_STAB], extack); if (IS_ERR(stab)) { @@ -1296,11 +1290,18 @@ static struct Qdisc *qdisc_create(struct net_device *dev, } rcu_assign_pointer(sch->stab, stab); } + + if (ops->init) { + err = ops->init(sch, tca[TCA_OPTIONS], extack); + if (err != 0) + goto err_out5; + } + if (tca[TCA_RATE]) { err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); - goto err_out4; + goto err_out5; } err = gen_new_estimator(&sch->bstats, @@ -1311,7 +1312,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); - goto err_out4; + goto err_out5; } } @@ -1321,6 +1322,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, return sch; err_out5: + qdisc_put_stab(rtnl_dereference(sch->stab)); +err_out4: /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ if (ops->destroy) ops->destroy(sch); @@ -1332,16 +1335,6 @@ err_out2: err_out: *errp = err; return NULL; - -err_out4: - /* - * Any broken qdiscs that would require a ops->reset() here? - * The qdisc was never in action so it shouldn't be necessary. - */ - qdisc_put_stab(rtnl_dereference(sch->stab)); - if (ops->destroy) - ops->destroy(sch); - goto err_out3; } static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 1c95785932b9..9781b47962bb 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -29,6 +29,8 @@ #include "sch_mqprio_lib.h" static LIST_HEAD(taprio_list); +static struct static_key_false taprio_have_broken_mqprio; +static struct static_key_false taprio_have_working_mqprio; #define TAPRIO_ALL_GATES_OPEN -1 @@ -37,15 +39,19 @@ static LIST_HEAD(taprio_list); #define TAPRIO_FLAGS_INVALID U32_MAX struct sched_entry { - struct list_head list; - - /* The instant that this entry "closes" and the next one - * should open, the qdisc will make some effort so that no - * packet leaves after this time. + /* Durations between this GCL entry and the GCL entry where the + * respective traffic class gate closes */ - ktime_t close_time; + u64 gate_duration[TC_MAX_QUEUE]; + atomic_t budget[TC_MAX_QUEUE]; + /* The qdisc makes some effort so that no packet leaves + * after this time + */ + ktime_t gate_close_time[TC_MAX_QUEUE]; + struct list_head list; + /* Used to calculate when to advance the schedule */ + ktime_t end_time; ktime_t next_txtime; - atomic_t budget; int index; u32 gate_mask; u32 interval; @@ -53,10 +59,16 @@ struct sched_entry { }; struct sched_gate_list { + /* Longest non-zero contiguous gate durations per traffic class, + * or 0 if a traffic class gate never opens during the schedule. + */ + u64 max_open_gate_duration[TC_MAX_QUEUE]; + u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */ + u32 max_sdu[TC_MAX_QUEUE]; /* for dump */ struct rcu_head rcu; struct list_head entries; size_t num_entries; - ktime_t cycle_close_time; + ktime_t cycle_end_time; s64 cycle_time; s64 cycle_time_extension; s64 base_time; @@ -69,6 +81,8 @@ struct taprio_sched { enum tk_offsets tk_offset; int clockid; bool offloaded; + bool detected_mqprio; + bool broken_mqprio; atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ * speeds it's sub-nanoseconds per byte */ @@ -80,8 +94,8 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; - u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */ - u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */ + int cur_txq[TC_MAX_QUEUE]; + u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */ u32 txtime_delay; }; @@ -90,6 +104,57 @@ struct __tc_taprio_qopt_offload { struct tc_taprio_qopt_offload offload; }; +static void taprio_calculate_gate_durations(struct taprio_sched *q, + struct sched_gate_list *sched) +{ + struct net_device *dev = qdisc_dev(q->root); + int num_tc = netdev_get_num_tc(dev); + struct sched_entry *entry, *cur; + int tc; + + list_for_each_entry(entry, &sched->entries, list) { + u32 gates_still_open = entry->gate_mask; + + /* For each traffic class, calculate each open gate duration, + * starting at this schedule entry and ending at the schedule + * entry containing a gate close event for that TC. + */ + cur = entry; + + do { + if (!gates_still_open) + break; + + for (tc = 0; tc < num_tc; tc++) { + if (!(gates_still_open & BIT(tc))) + continue; + + if (cur->gate_mask & BIT(tc)) + entry->gate_duration[tc] += cur->interval; + else + gates_still_open &= ~BIT(tc); + } + + cur = list_next_entry_circular(cur, &sched->entries, list); + } while (cur != entry); + + /* Keep track of the maximum gate duration for each traffic + * class, taking care to not confuse a traffic class which is + * temporarily closed with one that is always closed. + */ + for (tc = 0; tc < num_tc; tc++) + if (entry->gate_duration[tc] && + sched->max_open_gate_duration[tc] < entry->gate_duration[tc]) + sched->max_open_gate_duration[tc] = entry->gate_duration[tc]; + } +} + +static bool taprio_entry_allows_tx(ktime_t skb_end_time, + struct sched_entry *entry, int tc) +{ + return ktime_before(skb_end_time, entry->gate_close_time[tc]); +} + static ktime_t sched_base_time(const struct sched_gate_list *sched) { if (!sched) @@ -182,6 +247,55 @@ static int length_to_duration(struct taprio_sched *q, int len) return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC); } +static int duration_to_length(struct taprio_sched *q, u64 duration) +{ + return div_u64(duration * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte)); +} + +/* Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the + * q->max_sdu[] requested by the user and the max_sdu dynamically determined by + * the maximum open gate durations at the given link speed. + */ +static void taprio_update_queue_max_sdu(struct taprio_sched *q, + struct sched_gate_list *sched, + struct qdisc_size_table *stab) +{ + struct net_device *dev = qdisc_dev(q->root); + int num_tc = netdev_get_num_tc(dev); + u32 max_sdu_from_user; + u32 max_sdu_dynamic; + u32 max_sdu; + int tc; + + for (tc = 0; tc < num_tc; tc++) { + max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX; + + /* TC gate never closes => keep the queueMaxSDU + * selected by the user + */ + if (sched->max_open_gate_duration[tc] == sched->cycle_time) { + max_sdu_dynamic = U32_MAX; + } else { + u32 max_frm_len; + + max_frm_len = duration_to_length(q, sched->max_open_gate_duration[tc]); + if (stab) + max_frm_len -= stab->szopts.overhead; + max_sdu_dynamic = max_frm_len - dev->hard_header_len; + } + + max_sdu = min(max_sdu_dynamic, max_sdu_from_user); + + if (max_sdu != U32_MAX) { + sched->max_frm_len[tc] = max_sdu + dev->hard_header_len; + sched->max_sdu[tc] = max_sdu; + } else { + sched->max_frm_len[tc] = U32_MAX; /* never oversized */ + sched->max_sdu[tc] = 0; + } + } +} + /* Returns the entry corresponding to next available interval. If * validate_interval is set, it only validates whether the timestamp occurs * when the gate corresponding to the skb's traffic class is open. @@ -415,14 +529,33 @@ done: return txtime; } -static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, - struct Qdisc *child, struct sk_buff **to_free) +/* Devices with full offload are expected to honor this in hardware */ +static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch, + struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sched_gate_list *sched; int prio = skb->priority; + bool exceeds = false; u8 tc; + tc = netdev_get_prio_tc_map(dev, prio); + + rcu_read_lock(); + sched = rcu_dereference(q->oper_sched); + if (sched && skb->len > sched->max_frm_len[tc]) + exceeds = true; + rcu_read_unlock(); + + return exceeds; +} + +static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, + struct Qdisc *child, struct sk_buff **to_free) +{ + struct taprio_sched *q = qdisc_priv(sch); + /* sk_flags are only safe to use on full sockets. */ if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) @@ -433,17 +566,53 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } - /* Devices with full offload are expected to honor this in hardware */ - tc = netdev_get_prio_tc_map(dev, prio); - if (skb->len > q->max_frm_len[tc]) - return qdisc_drop(skb, sch, to_free); - qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return qdisc_enqueue(skb, child, to_free); } +static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch, + struct Qdisc *child, + struct sk_buff **to_free) +{ + unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb); + netdev_features_t features = netif_skb_features(skb); + struct sk_buff *segs, *nskb; + int ret; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) + return qdisc_drop(skb, sch, to_free); + + skb_list_walk_safe(segs, segs, nskb) { + skb_mark_not_on_list(segs); + qdisc_skb_cb(segs)->pkt_len = segs->len; + slen += segs->len; + + /* FIXME: we should be segmenting to a smaller size + * rather than dropping these + */ + if (taprio_skb_exceeds_queue_max_sdu(sch, segs)) + ret = qdisc_drop(segs, sch, to_free); + else + ret = taprio_enqueue_one(segs, sch, child, to_free); + + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + qdisc_qstats_drop(sch); + } else { + numsegs++; + } + } + + if (numsegs > 1) + qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen); + consume_skb(skb); + + return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; +} + /* Will not be called in the full offload case, since the TX queues are * attached to the Qdisc created using qdisc_create_dflt() */ @@ -460,97 +629,190 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(!child)) return qdisc_drop(skb, sch, to_free); - /* Large packets might not be transmitted when the transmission duration - * exceeds any configured interval. Therefore, segment the skb into - * smaller chunks. Drivers with full offload are expected to handle - * this in hardware. - */ - if (skb_is_gso(skb)) { - unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb); - netdev_features_t features = netif_skb_features(skb); - struct sk_buff *segs, *nskb; - int ret; - - segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR_OR_NULL(segs)) - return qdisc_drop(skb, sch, to_free); + if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) { + /* Large packets might not be transmitted when the transmission + * duration exceeds any configured interval. Therefore, segment + * the skb into smaller chunks. Drivers with full offload are + * expected to handle this in hardware. + */ + if (skb_is_gso(skb)) + return taprio_enqueue_segmented(skb, sch, child, + to_free); - skb_list_walk_safe(segs, segs, nskb) { - skb_mark_not_on_list(segs); - qdisc_skb_cb(segs)->pkt_len = segs->len; - slen += segs->len; + return qdisc_drop(skb, sch, to_free); + } - ret = taprio_enqueue_one(segs, sch, child, to_free); - if (ret != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) - qdisc_qstats_drop(sch); - } else { - numsegs++; - } - } + return taprio_enqueue_one(skb, sch, child, to_free); +} + +static struct sk_buff *taprio_peek(struct Qdisc *sch) +{ + WARN_ONCE(1, "taprio only supports operating as root qdisc, peek() not implemented"); + return NULL; +} - if (numsegs > 1) - qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen); - consume_skb(skb); +static void taprio_set_budgets(struct taprio_sched *q, + struct sched_gate_list *sched, + struct sched_entry *entry) +{ + struct net_device *dev = qdisc_dev(q->root); + int num_tc = netdev_get_num_tc(dev); + int tc, budget; - return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; + for (tc = 0; tc < num_tc; tc++) { + /* Traffic classes which never close have infinite budget */ + if (entry->gate_duration[tc] == sched->cycle_time) + budget = INT_MAX; + else + budget = div64_u64((u64)entry->gate_duration[tc] * PSEC_PER_NSEC, + atomic64_read(&q->picos_per_byte)); + + atomic_set(&entry->budget[tc], budget); } +} - return taprio_enqueue_one(skb, sch, child, to_free); +/* When an skb is sent, it consumes from the budget of all traffic classes */ +static int taprio_update_budgets(struct sched_entry *entry, size_t len, + int tc_consumed, int num_tc) +{ + int tc, budget, new_budget = 0; + + for (tc = 0; tc < num_tc; tc++) { + budget = atomic_read(&entry->budget[tc]); + /* Don't consume from infinite budget */ + if (budget == INT_MAX) { + if (tc == tc_consumed) + new_budget = budget; + continue; + } + + if (tc == tc_consumed) + new_budget = atomic_sub_return(len, &entry->budget[tc]); + else + atomic_sub(len, &entry->budget[tc]); + } + + return new_budget; } -/* Will not be called in the full offload case, since the TX queues are - * attached to the Qdisc created using qdisc_create_dflt() - */ -static struct sk_buff *taprio_peek(struct Qdisc *sch) +static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq, + struct sched_entry *entry, + u32 gate_mask) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct sched_entry *entry; + struct Qdisc *child = q->qdiscs[txq]; + int num_tc = netdev_get_num_tc(dev); struct sk_buff *skb; - u32 gate_mask; - int i; + ktime_t guard; + int prio; + int len; + u8 tc; - rcu_read_lock(); - entry = rcu_dereference(q->current_entry); - gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; - rcu_read_unlock(); + if (unlikely(!child)) + return NULL; - if (!gate_mask) + if (TXTIME_ASSIST_IS_ENABLED(q->flags)) + goto skip_peek_checks; + + skb = child->ops->peek(child); + if (!skb) return NULL; - for (i = 0; i < dev->num_tx_queues; i++) { - struct Qdisc *child = q->qdiscs[i]; - int prio; - u8 tc; + prio = skb->priority; + tc = netdev_get_prio_tc_map(dev, prio); - if (unlikely(!child)) - continue; + if (!(gate_mask & BIT(tc))) + return NULL; - skb = child->ops->peek(child); - if (!skb) - continue; + len = qdisc_pkt_len(skb); + guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len)); - if (TXTIME_ASSIST_IS_ENABLED(q->flags)) - return skb; + /* In the case that there's no gate entry, there's no + * guard band ... + */ + if (gate_mask != TAPRIO_ALL_GATES_OPEN && + !taprio_entry_allows_tx(guard, entry, tc)) + return NULL; + + /* ... and no budget. */ + if (gate_mask != TAPRIO_ALL_GATES_OPEN && + taprio_update_budgets(entry, len, tc, num_tc) < 0) + return NULL; + +skip_peek_checks: + skb = child->ops->dequeue(child); + if (unlikely(!skb)) + return NULL; + + qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + + return skb; +} + +static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq) +{ + int offset = dev->tc_to_txq[tc].offset; + int count = dev->tc_to_txq[tc].count; + + (*txq)++; + if (*txq == offset + count) + *txq = offset; +} + +/* Prioritize higher traffic classes, and select among TXQs belonging to the + * same TC using round robin + */ +static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch, + struct sched_entry *entry, + u32 gate_mask) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int num_tc = netdev_get_num_tc(dev); + struct sk_buff *skb; + int tc; - prio = skb->priority; - tc = netdev_get_prio_tc_map(dev, prio); + for (tc = num_tc - 1; tc >= 0; tc--) { + int first_txq = q->cur_txq[tc]; if (!(gate_mask & BIT(tc))) continue; - return skb; + do { + skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc], + entry, gate_mask); + + taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]); + + if (skb) + return skb; + } while (q->cur_txq[tc] != first_txq); } return NULL; } -static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) +/* Broken way of prioritizing smaller TXQ indices and ignoring the traffic + * class other than to determine whether the gate is open or not + */ +static struct sk_buff *taprio_dequeue_txq_priority(struct Qdisc *sch, + struct sched_entry *entry, + u32 gate_mask) { - atomic_set(&entry->budget, - div64_u64((u64)entry->interval * PSEC_PER_NSEC, - atomic64_read(&q->picos_per_byte))); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask); + if (skb) + return skb; + } + + return NULL; } /* Will not be called in the full offload case, since the TX queues are @@ -559,11 +821,9 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) static struct sk_buff *taprio_dequeue(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); struct sk_buff *skb = NULL; struct sched_entry *entry; u32 gate_mask; - int i; rcu_read_lock(); entry = rcu_dereference(q->current_entry); @@ -573,69 +833,23 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) * "AdminGateStates" */ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; - if (!gate_mask) goto done; - for (i = 0; i < dev->num_tx_queues; i++) { - struct Qdisc *child = q->qdiscs[i]; - ktime_t guard; - int prio; - int len; - u8 tc; - - if (unlikely(!child)) - continue; - - if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { - skb = child->ops->dequeue(child); - if (!skb) - continue; - goto skb_found; - } - - skb = child->ops->peek(child); - if (!skb) - continue; - - prio = skb->priority; - tc = netdev_get_prio_tc_map(dev, prio); - - if (!(gate_mask & BIT(tc))) { - skb = NULL; - continue; - } - - len = qdisc_pkt_len(skb); - guard = ktime_add_ns(taprio_get_time(q), - length_to_duration(q, len)); - - /* In the case that there's no gate entry, there's no - * guard band ... - */ - if (gate_mask != TAPRIO_ALL_GATES_OPEN && - ktime_after(guard, entry->close_time)) { - skb = NULL; - continue; - } - - /* ... and no budget. */ - if (gate_mask != TAPRIO_ALL_GATES_OPEN && - atomic_sub_return(len, &entry->budget) < 0) { - skb = NULL; - continue; - } - - skb = child->ops->dequeue(child); - if (unlikely(!skb)) - goto done; - -skb_found: - qdisc_bstats_update(sch, skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; - - goto done; + if (static_branch_unlikely(&taprio_have_broken_mqprio) && + !static_branch_likely(&taprio_have_working_mqprio)) { + /* Single NIC kind which is broken */ + skb = taprio_dequeue_txq_priority(sch, entry, gate_mask); + } else if (static_branch_likely(&taprio_have_working_mqprio) && + !static_branch_unlikely(&taprio_have_broken_mqprio)) { + /* Single NIC kind which prioritizes properly */ + skb = taprio_dequeue_tc_priority(sch, entry, gate_mask); + } else { + /* Mixed NIC kinds present in system, need dynamic testing */ + if (q->broken_mqprio) + skb = taprio_dequeue_txq_priority(sch, entry, gate_mask); + else + skb = taprio_dequeue_tc_priority(sch, entry, gate_mask); } done: @@ -650,7 +864,7 @@ static bool should_restart_cycle(const struct sched_gate_list *oper, if (list_is_last(&entry->list, &oper->entries)) return true; - if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0) + if (ktime_compare(entry->end_time, oper->cycle_end_time) == 0) return true; return false; @@ -658,7 +872,7 @@ static bool should_restart_cycle(const struct sched_gate_list *oper, static bool should_change_schedules(const struct sched_gate_list *admin, const struct sched_gate_list *oper, - ktime_t close_time) + ktime_t end_time) { ktime_t next_base_time, extension_time; @@ -667,18 +881,18 @@ static bool should_change_schedules(const struct sched_gate_list *admin, next_base_time = sched_base_time(admin); - /* This is the simple case, the close_time would fall after + /* This is the simple case, the end_time would fall after * the next schedule base_time. */ - if (ktime_compare(next_base_time, close_time) <= 0) + if (ktime_compare(next_base_time, end_time) <= 0) return true; - /* This is the cycle_time_extension case, if the close_time + /* This is the cycle_time_extension case, if the end_time * plus the amount that can be extended would fall after the * next schedule base_time, we can extend the current schedule * for that amount. */ - extension_time = ktime_add_ns(close_time, oper->cycle_time_extension); + extension_time = ktime_add_ns(end_time, oper->cycle_time_extension); /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about * how precisely the extension should be made. So after @@ -694,10 +908,13 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer) { struct taprio_sched *q = container_of(timer, struct taprio_sched, advance_timer); + struct net_device *dev = qdisc_dev(q->root); struct sched_gate_list *oper, *admin; + int num_tc = netdev_get_num_tc(dev); struct sched_entry *entry, *next; struct Qdisc *sch = q->root; - ktime_t close_time; + ktime_t end_time; + int tc; spin_lock(&q->current_entry_lock); entry = rcu_dereference_protected(q->current_entry, @@ -716,41 +933,49 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer) * entry of all schedules are pre-calculated during the * schedule initialization. */ - if (unlikely(!entry || entry->close_time == oper->base_time)) { + if (unlikely(!entry || entry->end_time == oper->base_time)) { next = list_first_entry(&oper->entries, struct sched_entry, list); - close_time = next->close_time; + end_time = next->end_time; goto first_run; } if (should_restart_cycle(oper, entry)) { next = list_first_entry(&oper->entries, struct sched_entry, list); - oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time, - oper->cycle_time); + oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time, + oper->cycle_time); } else { next = list_next_entry(entry, list); } - close_time = ktime_add_ns(entry->close_time, next->interval); - close_time = min_t(ktime_t, close_time, oper->cycle_close_time); + end_time = ktime_add_ns(entry->end_time, next->interval); + end_time = min_t(ktime_t, end_time, oper->cycle_end_time); + + for (tc = 0; tc < num_tc; tc++) { + if (next->gate_duration[tc] == oper->cycle_time) + next->gate_close_time[tc] = KTIME_MAX; + else + next->gate_close_time[tc] = ktime_add_ns(entry->end_time, + next->gate_duration[tc]); + } - if (should_change_schedules(admin, oper, close_time)) { + if (should_change_schedules(admin, oper, end_time)) { /* Set things so the next time this runs, the new * schedule runs. */ - close_time = sched_base_time(admin); + end_time = sched_base_time(admin); switch_schedules(q, &admin, &oper); } - next->close_time = close_time; - taprio_set_budget(q, next); + next->end_time = end_time; + taprio_set_budgets(q, oper, next); first_run: rcu_assign_pointer(q->current_entry, next); spin_unlock(&q->current_entry_lock); - hrtimer_set_expires(&q->advance_timer, close_time); + hrtimer_set_expires(&q->advance_timer, end_time); rcu_read_lock(); __netif_schedule(sch); @@ -918,6 +1143,8 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, new->cycle_time = cycle; } + taprio_calculate_gate_durations(q, new); + return 0; } @@ -986,11 +1213,14 @@ static int taprio_get_start_time(struct Qdisc *sch, return 0; } -static void setup_first_close_time(struct taprio_sched *q, - struct sched_gate_list *sched, ktime_t base) +static void setup_first_end_time(struct taprio_sched *q, + struct sched_gate_list *sched, ktime_t base) { + struct net_device *dev = qdisc_dev(q->root); + int num_tc = netdev_get_num_tc(dev); struct sched_entry *first; ktime_t cycle; + int tc; first = list_first_entry(&sched->entries, struct sched_entry, list); @@ -998,10 +1228,18 @@ static void setup_first_close_time(struct taprio_sched *q, cycle = sched->cycle_time; /* FIXME: find a better place to do this */ - sched->cycle_close_time = ktime_add_ns(base, cycle); + sched->cycle_end_time = ktime_add_ns(base, cycle); + + first->end_time = ktime_add_ns(base, first->interval); + taprio_set_budgets(q, sched, first); + + for (tc = 0; tc < num_tc; tc++) { + if (first->gate_duration[tc] == sched->cycle_time) + first->gate_close_time[tc] = KTIME_MAX; + else + first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]); + } - first->close_time = ktime_add_ns(base, first->interval); - taprio_set_budget(q, first); rcu_assign_pointer(q->current_entry, NULL); } @@ -1055,6 +1293,8 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct sched_gate_list *oper, *admin; + struct qdisc_size_table *stab; struct taprio_sched *q; ASSERT_RTNL(); @@ -1067,6 +1307,17 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, continue; taprio_set_picos_per_byte(dev, q); + + stab = rtnl_dereference(q->root->stab); + + oper = rtnl_dereference(q->oper_sched); + if (oper) + taprio_update_queue_max_sdu(q, oper, stab); + + admin = rtnl_dereference(q->admin_sched); + if (admin) + taprio_update_queue_max_sdu(q, admin, stab); + break; } @@ -1197,6 +1448,34 @@ static void taprio_sched_to_offload(struct net_device *dev, offload->num_entries = i; } +static void taprio_detect_broken_mqprio(struct taprio_sched *q) +{ + struct net_device *dev = qdisc_dev(q->root); + struct tc_taprio_caps caps; + + qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO, + &caps, sizeof(caps)); + + q->broken_mqprio = caps.broken_mqprio; + if (q->broken_mqprio) + static_branch_inc(&taprio_have_broken_mqprio); + else + static_branch_inc(&taprio_have_working_mqprio); + + q->detected_mqprio = true; +} + +static void taprio_cleanup_broken_mqprio(struct taprio_sched *q) +{ + if (!q->detected_mqprio) + return; + + if (q->broken_mqprio) + static_branch_dec(&taprio_have_broken_mqprio); + else + static_branch_dec(&taprio_have_working_mqprio); +} + static int taprio_enable_offload(struct net_device *dev, struct taprio_sched *q, struct sched_gate_list *sched, @@ -1425,7 +1704,6 @@ static int taprio_parse_tc_entries(struct Qdisc *sch, struct netlink_ext_ack *extack) { struct taprio_sched *q = qdisc_priv(sch); - struct net_device *dev = qdisc_dev(sch); u32 max_sdu[TC_QOPT_MAX_QUEUE]; unsigned long seen_tcs = 0; struct nlattr *n; @@ -1439,18 +1717,14 @@ static int taprio_parse_tc_entries(struct Qdisc *sch, if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) continue; - err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack); + err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, + extack); if (err) goto out; } - for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) q->max_sdu[tc] = max_sdu[tc]; - if (max_sdu[tc]) - q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len; - else - q->max_frm_len[tc] = U32_MAX; /* never oversized */ - } out: return err; @@ -1506,6 +1780,7 @@ static int taprio_new_flags(const struct nlattr *attr, u32 old, static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + struct qdisc_size_table *stab = rtnl_dereference(sch->stab); struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; struct sched_gate_list *oper, *admin, *new_admin; struct taprio_sched *q = qdisc_priv(sch); @@ -1573,15 +1848,18 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; taprio_set_picos_per_byte(dev, q); + taprio_update_queue_max_sdu(q, new_admin, stab); if (mqprio) { err = netdev_set_num_tc(dev, mqprio->num_tc); if (err) goto free_sched; - for (i = 0; i < mqprio->num_tc; i++) + for (i = 0; i < mqprio->num_tc; i++) { netdev_set_tc_queue(dev, i, mqprio->count[i], mqprio->offset[i]); + q->cur_txq[i] = mqprio->offset[i]; + } /* Always use supplied priority mappings */ for (i = 0; i <= TC_BITMASK; i++) @@ -1636,7 +1914,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (admin) call_rcu(&admin->rcu, taprio_free_sched_cb); } else { - setup_first_close_time(q, new_admin, start); + setup_first_end_time(q, new_admin, start); /* Protects against advance_sched() */ spin_lock_irqsave(&q->current_entry_lock, flags); @@ -1656,6 +1934,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, new_admin = NULL; err = 0; + if (!stab) + NL_SET_ERR_MSG_MOD(extack, + "Size table not specified, frame length estimations may be inaccurate"); + unlock: spin_unlock_bh(qdisc_lock(sch)); @@ -1716,6 +1998,8 @@ static void taprio_destroy(struct Qdisc *sch) if (admin) call_rcu(&admin->rcu, taprio_free_sched_cb); + + taprio_cleanup_broken_mqprio(q); } static int taprio_init(struct Qdisc *sch, struct nlattr *opt, @@ -1780,6 +2064,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, q->qdiscs[i] = qdisc; } + taprio_detect_broken_mqprio(q); + return taprio_change(sch, opt, extack); } @@ -1920,7 +2206,8 @@ error_nest: return -1; } -static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb) +static int taprio_dump_tc_entries(struct sk_buff *skb, + struct sched_gate_list *sched) { struct nlattr *n; int tc; @@ -1934,7 +2221,7 @@ static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb) goto nla_put_failure; if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU, - q->max_sdu[tc])) + sched->max_sdu[tc])) goto nla_put_failure; nla_nest_end(skb, n); @@ -1978,7 +2265,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; - if (taprio_dump_tc_entries(q, skb)) + if (oper && taprio_dump_tc_entries(skb, oper)) goto options_error; if (oper && dump_schedule(skb, oper)) |