diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice')
31 files changed, 2074 insertions, 938 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index cc5b85afd437..001500afc4a6 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) * ice_xsk_pool - get XSK buffer pool bound to a ring * @ring: Rx ring to use * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. + * Returns a pointer to xsk_buff_pool structure if there is a buffer pool + * present, NULL otherwise. */ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) { @@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) } /** - * ice_tx_xsk_pool - get XSK buffer pool bound to a ring - * @ring: Tx ring to use + * ice_tx_xsk_pool - assign XSK buff pool to XDP ring + * @vsi: pointer to VSI + * @qid: index of a queue to look at XSK buff pool presence * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. Tx equivalent of ice_xsk_pool. + * Sets XSK buff pool pointer on XDP ring. + * + * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided + * queue id. Reason for doing so is that queue vectors might have assigned more + * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring + * carries a pointer to one of these XDP rings for its own purposes, such as + * handling XDP_TX action, therefore we can piggyback here on the + * rx_ring->xdp_ring assignment that was done during XDP rings initialization. */ -static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) +static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) { - struct ice_vsi *vsi = ring->vsi; - u16 qid; + struct ice_tx_ring *ring; - qid = ring->q_index - vsi->alloc_txq; + ring = vsi->rx_rings[qid]->xdp_ring; + if (!ring) + return; - if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) - return NULL; + if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) { + ring->xsk_pool = NULL; + return; + } - return xsk_get_pool_from_qid(vsi->netdev, qid); + ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid); } /** @@ -854,6 +864,7 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, struct ice_q_stats stats, u64 *pkts, u64 *bytes); int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); +int ice_down_up(struct ice_vsi *vsi); int ice_vsi_cfg(struct ice_vsi *vsi); struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); int ice_vsi_determine_xdp_res(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 9939238573a4..1bdc70aa979d 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1423,6 +1423,56 @@ struct ice_aqc_set_port_id_led { u8 rsvd[13]; }; +/* Get Port Options (indirect, 0x06EA) */ +struct ice_aqc_get_port_options { + u8 lport_num; + u8 lport_num_valid; + u8 port_options_count; +#define ICE_AQC_PORT_OPT_COUNT_M GENMASK(3, 0) +#define ICE_AQC_PORT_OPT_MAX 16 + + u8 innermost_phy_index; + u8 port_options; +#define ICE_AQC_PORT_OPT_ACTIVE_M GENMASK(3, 0) +#define ICE_AQC_PORT_OPT_VALID BIT(7) + + u8 pending_port_option_status; +#define ICE_AQC_PENDING_PORT_OPT_IDX_M GENMASK(3, 0) +#define ICE_AQC_PENDING_PORT_OPT_VALID BIT(7) + + u8 rsvd[2]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_get_port_options_elem { + u8 pmd; +#define ICE_AQC_PORT_OPT_PMD_COUNT_M GENMASK(3, 0) + + u8 max_lane_speed; +#define ICE_AQC_PORT_OPT_MAX_LANE_M GENMASK(3, 0) +#define ICE_AQC_PORT_OPT_MAX_LANE_100M 0 +#define ICE_AQC_PORT_OPT_MAX_LANE_1G 1 +#define ICE_AQC_PORT_OPT_MAX_LANE_2500M 2 +#define ICE_AQC_PORT_OPT_MAX_LANE_5G 3 +#define ICE_AQC_PORT_OPT_MAX_LANE_10G 4 +#define ICE_AQC_PORT_OPT_MAX_LANE_25G 5 +#define ICE_AQC_PORT_OPT_MAX_LANE_50G 6 +#define ICE_AQC_PORT_OPT_MAX_LANE_100G 7 + + u8 global_scid[2]; + u8 phy_scid[2]; + u8 pf2port_cid[2]; +}; + +/* Set Port Option (direct, 0x06EB) */ +struct ice_aqc_set_port_option { + u8 lport_num; + u8 lport_num_valid; + u8 selected_port_option; + u8 rsvd[13]; +}; + /* Set/Get GPIO (direct, 0x06EC/0x06ED) */ struct ice_aqc_gpio { __le16 gpio_ctrl_handle; @@ -1489,6 +1539,12 @@ struct ice_aqc_nvm { #define ICE_AQC_NVM_PERST_FLAG 1 #define ICE_AQC_NVM_EMPR_FLAG 2 #define ICE_AQC_NVM_EMPR_ENA BIT(0) /* Write Activate reply only */ + /* For Write Activate, several flags are sent as part of a separate + * flags2 field using a separate byte. For simplicity of the software + * interface, we pass the flags as a 16 bit value so these flags are + * all offset by 8 bits + */ +#define ICE_AQC_NVM_ACTIV_REQ_EMPR BIT(8) /* NVM Write Activate only */ __le16 module_typeid; __le16 length; #define ICE_AQC_NVM_ERASE_LEN 0xFFFF @@ -2082,6 +2138,8 @@ struct ice_aq_desc { struct ice_aqc_gpio read_write_gpio; struct ice_aqc_sff_eeprom read_write_sff_param; struct ice_aqc_set_port_id_led set_port_id_led; + struct ice_aqc_get_port_options get_port_options; + struct ice_aqc_set_port_option set_port_option; struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_set_port_params set_port_params; struct ice_aqc_sw_rules sw_rules; @@ -2243,6 +2301,8 @@ enum ice_adminq_opc { ice_aqc_opc_read_i2c = 0x06E2, ice_aqc_opc_write_i2c = 0x06E3, ice_aqc_opc_set_port_id_led = 0x06E9, + ice_aqc_opc_get_port_options = 0x06EA, + ice_aqc_opc_set_port_option = 0x06EB, ice_aqc_opc_set_gpio = 0x06EC, ice_aqc_opc_get_gpio = 0x06ED, ice_aqc_opc_sff_eeprom = 0x06EE, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 136d7911adb4..9e36f01dfa4f 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -7,18 +7,6 @@ #include "ice_dcb_lib.h" #include "ice_sriov.h" -static bool ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring) -{ - rx_ring->xdp_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->xdp_buf), GFP_KERNEL); - return !!rx_ring->xdp_buf; -} - -static bool ice_alloc_rx_buf(struct ice_rx_ring *rx_ring) -{ - rx_ring->rx_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL); - return !!rx_ring->rx_buf; -} - /** * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI * @qs_cfg: gathered variables needed for PF->VSI queues assignment @@ -142,8 +130,7 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) * handler here (i.e. resume, reset/rebuild, etc.) */ if (vsi->netdev) - netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, - NAPI_POLL_WEIGHT); + netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll); out: /* tie q_vector and VSI together */ @@ -417,7 +404,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Strip the Ethernet CRC bytes before the packet is posted to host * memory. */ - rlan_ctx.crcstrip = 1; + rlan_ctx.crcstrip = !(ring->flags & ICE_RX_FLAGS_CRC_STRIP_DIS); /* L2TSEL flag defines the reported L2 Tags in the receive descriptor * and it needs to remain 1 for non-DVM capable configurations to not @@ -519,11 +506,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id); - kfree(ring->rx_buf); ring->xsk_pool = ice_xsk_pool(ring); if (ring->xsk_pool) { - if (!ice_alloc_rx_buf_zc(ring)) - return -ENOMEM; xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); ring->rx_buf_len = @@ -538,8 +522,6 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { - if (!ice_alloc_rx_buf(ring)) - return -ENOMEM; if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) /* coverity[check_return] */ xdp_rxq_info_reg(&ring->xdp_rxq, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 27d0cbbd29da..039342a0ed15 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -8,6 +8,108 @@ #define ICE_PF_RESET_WAIT_COUNT 300 +static const char * const ice_link_mode_str_low[] = { + [0] = "100BASE_TX", + [1] = "100M_SGMII", + [2] = "1000BASE_T", + [3] = "1000BASE_SX", + [4] = "1000BASE_LX", + [5] = "1000BASE_KX", + [6] = "1G_SGMII", + [7] = "2500BASE_T", + [8] = "2500BASE_X", + [9] = "2500BASE_KX", + [10] = "5GBASE_T", + [11] = "5GBASE_KR", + [12] = "10GBASE_T", + [13] = "10G_SFI_DA", + [14] = "10GBASE_SR", + [15] = "10GBASE_LR", + [16] = "10GBASE_KR_CR1", + [17] = "10G_SFI_AOC_ACC", + [18] = "10G_SFI_C2C", + [19] = "25GBASE_T", + [20] = "25GBASE_CR", + [21] = "25GBASE_CR_S", + [22] = "25GBASE_CR1", + [23] = "25GBASE_SR", + [24] = "25GBASE_LR", + [25] = "25GBASE_KR", + [26] = "25GBASE_KR_S", + [27] = "25GBASE_KR1", + [28] = "25G_AUI_AOC_ACC", + [29] = "25G_AUI_C2C", + [30] = "40GBASE_CR4", + [31] = "40GBASE_SR4", + [32] = "40GBASE_LR4", + [33] = "40GBASE_KR4", + [34] = "40G_XLAUI_AOC_ACC", + [35] = "40G_XLAUI", + [36] = "50GBASE_CR2", + [37] = "50GBASE_SR2", + [38] = "50GBASE_LR2", + [39] = "50GBASE_KR2", + [40] = "50G_LAUI2_AOC_ACC", + [41] = "50G_LAUI2", + [42] = "50G_AUI2_AOC_ACC", + [43] = "50G_AUI2", + [44] = "50GBASE_CP", + [45] = "50GBASE_SR", + [46] = "50GBASE_FR", + [47] = "50GBASE_LR", + [48] = "50GBASE_KR_PAM4", + [49] = "50G_AUI1_AOC_ACC", + [50] = "50G_AUI1", + [51] = "100GBASE_CR4", + [52] = "100GBASE_SR4", + [53] = "100GBASE_LR4", + [54] = "100GBASE_KR4", + [55] = "100G_CAUI4_AOC_ACC", + [56] = "100G_CAUI4", + [57] = "100G_AUI4_AOC_ACC", + [58] = "100G_AUI4", + [59] = "100GBASE_CR_PAM4", + [60] = "100GBASE_KR_PAM4", + [61] = "100GBASE_CP2", + [62] = "100GBASE_SR2", + [63] = "100GBASE_DR", +}; + +static const char * const ice_link_mode_str_high[] = { + [0] = "100GBASE_KR2_PAM4", + [1] = "100G_CAUI2_AOC_ACC", + [2] = "100G_CAUI2", + [3] = "100G_AUI2_AOC_ACC", + [4] = "100G_AUI2", +}; + +/** + * ice_dump_phy_type - helper function to dump phy_type + * @hw: pointer to the HW structure + * @low: 64 bit value for phy_type_low + * @high: 64 bit value for phy_type_high + * @prefix: prefix string to differentiate multiple dumps + */ +static void +ice_dump_phy_type(struct ice_hw *hw, u64 low, u64 high, const char *prefix) +{ + ice_debug(hw, ICE_DBG_PHY, "%s: phy_type_low: 0x%016llx\n", prefix, low); + + for (u32 i = 0; i < BITS_PER_TYPE(typeof(low)); i++) { + if (low & BIT_ULL(i)) + ice_debug(hw, ICE_DBG_PHY, "%s: bit(%d): %s\n", + prefix, i, ice_link_mode_str_low[i]); + } + + ice_debug(hw, ICE_DBG_PHY, "%s: phy_type_high: 0x%016llx\n", prefix, high); + + for (u32 i = 0; i < BITS_PER_TYPE(typeof(high)); i++) { + if (high & BIT_ULL(i)) + ice_debug(hw, ICE_DBG_PHY, "%s: bit(%d): %s\n", + prefix, i, ice_link_mode_str_high[i]); + } +} + /** * ice_set_mac_type - Sets MAC type * @hw: pointer to the HW structure @@ -80,9 +182,23 @@ bool ice_is_e810t(struct ice_hw *hw) { switch (hw->device_id) { case ICE_DEV_ID_E810C_SFP: - if (hw->subsystem_device_id == ICE_SUBDEV_ID_E810T || - hw->subsystem_device_id == ICE_SUBDEV_ID_E810T2) + switch (hw->subsystem_device_id) { + case ICE_SUBDEV_ID_E810T: + case ICE_SUBDEV_ID_E810T2: + case ICE_SUBDEV_ID_E810T3: + case ICE_SUBDEV_ID_E810T4: + case ICE_SUBDEV_ID_E810T6: + case ICE_SUBDEV_ID_E810T7: + return true; + } + break; + case ICE_DEV_ID_E810C_QSFP: + switch (hw->subsystem_device_id) { + case ICE_SUBDEV_ID_E810T2: + case ICE_SUBDEV_ID_E810T3: + case ICE_SUBDEV_ID_E810T5: return true; + } break; default: break; @@ -183,6 +299,7 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, struct ice_aqc_get_phy_caps *cmd; u16 pcaps_size = sizeof(*pcaps); struct ice_aq_desc desc; + const char *prefix; struct ice_hw *hw; int status; @@ -204,29 +321,48 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, cmd->param0 |= cpu_to_le16(report_mode); status = ice_aq_send_cmd(hw, &desc, pcaps, pcaps_size, cd); - ice_debug(hw, ICE_DBG_LINK, "get phy caps - report_mode = 0x%x\n", - report_mode); - ice_debug(hw, ICE_DBG_LINK, " phy_type_low = 0x%llx\n", - (unsigned long long)le64_to_cpu(pcaps->phy_type_low)); - ice_debug(hw, ICE_DBG_LINK, " phy_type_high = 0x%llx\n", - (unsigned long long)le64_to_cpu(pcaps->phy_type_high)); - ice_debug(hw, ICE_DBG_LINK, " caps = 0x%x\n", pcaps->caps); - ice_debug(hw, ICE_DBG_LINK, " low_power_ctrl_an = 0x%x\n", + ice_debug(hw, ICE_DBG_LINK, "get phy caps dump\n"); + + switch (report_mode) { + case ICE_AQC_REPORT_TOPO_CAP_MEDIA: + prefix = "phy_caps_media"; + break; + case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA: + prefix = "phy_caps_no_media"; + break; + case ICE_AQC_REPORT_ACTIVE_CFG: + prefix = "phy_caps_active"; + break; + case ICE_AQC_REPORT_DFLT_CFG: + prefix = "phy_caps_default"; + break; + default: + prefix = "phy_caps_invalid"; + } + + ice_dump_phy_type(hw, le64_to_cpu(pcaps->phy_type_low), + le64_to_cpu(pcaps->phy_type_high), prefix); + + ice_debug(hw, ICE_DBG_LINK, "%s: report_mode = 0x%x\n", + prefix, report_mode); + ice_debug(hw, ICE_DBG_LINK, "%s: caps = 0x%x\n", prefix, pcaps->caps); + ice_debug(hw, ICE_DBG_LINK, "%s: low_power_ctrl_an = 0x%x\n", prefix, pcaps->low_power_ctrl_an); - ice_debug(hw, ICE_DBG_LINK, " eee_cap = 0x%x\n", pcaps->eee_cap); - ice_debug(hw, ICE_DBG_LINK, " eeer_value = 0x%x\n", + ice_debug(hw, ICE_DBG_LINK, "%s: eee_cap = 0x%x\n", prefix, + pcaps->eee_cap); + ice_debug(hw, ICE_DBG_LINK, "%s: eeer_value = 0x%x\n", prefix, pcaps->eeer_value); - ice_debug(hw, ICE_DBG_LINK, " link_fec_options = 0x%x\n", + ice_debug(hw, ICE_DBG_LINK, "%s: link_fec_options = 0x%x\n", prefix, pcaps->link_fec_options); - ice_debug(hw, ICE_DBG_LINK, " module_compliance_enforcement = 0x%x\n", - pcaps->module_compliance_enforcement); - ice_debug(hw, ICE_DBG_LINK, " extended_compliance_code = 0x%x\n", - pcaps->extended_compliance_code); - ice_debug(hw, ICE_DBG_LINK, " module_type[0] = 0x%x\n", + ice_debug(hw, ICE_DBG_LINK, "%s: module_compliance_enforcement = 0x%x\n", + prefix, pcaps->module_compliance_enforcement); + ice_debug(hw, ICE_DBG_LINK, "%s: extended_compliance_code = 0x%x\n", + prefix, pcaps->extended_compliance_code); + ice_debug(hw, ICE_DBG_LINK, "%s: module_type[0] = 0x%x\n", prefix, pcaps->module_type[0]); - ice_debug(hw, ICE_DBG_LINK, " module_type[1] = 0x%x\n", + ice_debug(hw, ICE_DBG_LINK, "%s: module_type[1] = 0x%x\n", prefix, pcaps->module_type[1]); - ice_debug(hw, ICE_DBG_LINK, " module_type[2] = 0x%x\n", + ice_debug(hw, ICE_DBG_LINK, "%s: module_type[2] = 0x%x\n", prefix, pcaps->module_type[2]); if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) { @@ -2397,6 +2533,8 @@ ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, info->tmr1_owned = ((number & ICE_TS_TMR1_OWND_M) != 0); info->tmr1_ena = ((number & ICE_TS_TMR1_ENA_M) != 0); + info->ts_ll_read = ((number & ICE_TS_LL_TX_TS_READ_M) != 0); + info->ena_ports = logical_id; info->tmr_own_map = phys_id; @@ -2414,6 +2552,8 @@ ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, info->tmr1_owned); ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_ena = %u\n", info->tmr1_ena); + ice_debug(hw, ICE_DBG_INIT, "dev caps: ts_ll_read = %u\n", + info->ts_ll_read); ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 ena_ports = %u\n", info->ena_ports); ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr_own_map = %u\n", @@ -2776,6 +2916,26 @@ ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan, } /** + * ice_is_100m_speed_supported + * @hw: pointer to the HW struct + * + * returns true if 100M speeds are supported by the device, + * false otherwise. + */ +bool ice_is_100m_speed_supported(struct ice_hw *hw) +{ + switch (hw->device_id) { + case ICE_DEV_ID_E822C_SGMII: + case ICE_DEV_ID_E822L_SGMII: + case ICE_DEV_ID_E823L_1GBE: + case ICE_DEV_ID_E823C_SGMII: + return true; + default: + return false; + } +} + +/** * ice_get_link_speed_based_on_phy_type - returns link speed * @phy_type_low: lower part of phy_type * @phy_type_high: higher part of phy_type @@ -3535,6 +3695,121 @@ ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, } /** + * ice_aq_get_port_options + * @hw: pointer to the HW struct + * @options: buffer for the resultant port options + * @option_count: input - size of the buffer in port options structures, + * output - number of returned port options + * @lport: logical port to call the command with (optional) + * @lport_valid: when false, FW uses port owned by the PF instead of lport, + * when PF owns more than 1 port it must be true + * @active_option_idx: index of active port option in returned buffer + * @active_option_valid: active option in returned buffer is valid + * @pending_option_idx: index of pending port option in returned buffer + * @pending_option_valid: pending option in returned buffer is valid + * + * Calls Get Port Options AQC (0x06ea) and verifies result. + */ +int +ice_aq_get_port_options(struct ice_hw *hw, + struct ice_aqc_get_port_options_elem *options, + u8 *option_count, u8 lport, bool lport_valid, + u8 *active_option_idx, bool *active_option_valid, + u8 *pending_option_idx, bool *pending_option_valid) +{ + struct ice_aqc_get_port_options *cmd; + struct ice_aq_desc desc; + int status; + u8 i; + + /* options buffer shall be able to hold max returned options */ + if (*option_count < ICE_AQC_PORT_OPT_COUNT_M) + return -EINVAL; + + cmd = &desc.params.get_port_options; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_port_options); + + if (lport_valid) + cmd->lport_num = lport; + cmd->lport_num_valid = lport_valid; + + status = ice_aq_send_cmd(hw, &desc, options, + *option_count * sizeof(*options), NULL); + if (status) + return status; + + /* verify direct FW response & set output parameters */ + *option_count = FIELD_GET(ICE_AQC_PORT_OPT_COUNT_M, + cmd->port_options_count); + ice_debug(hw, ICE_DBG_PHY, "options: %x\n", *option_count); + *active_option_valid = FIELD_GET(ICE_AQC_PORT_OPT_VALID, + cmd->port_options); + if (*active_option_valid) { + *active_option_idx = FIELD_GET(ICE_AQC_PORT_OPT_ACTIVE_M, + cmd->port_options); + if (*active_option_idx > (*option_count - 1)) + return -EIO; + ice_debug(hw, ICE_DBG_PHY, "active idx: %x\n", + *active_option_idx); + } + + *pending_option_valid = FIELD_GET(ICE_AQC_PENDING_PORT_OPT_VALID, + cmd->pending_port_option_status); + if (*pending_option_valid) { + *pending_option_idx = FIELD_GET(ICE_AQC_PENDING_PORT_OPT_IDX_M, + cmd->pending_port_option_status); + if (*pending_option_idx > (*option_count - 1)) + return -EIO; + ice_debug(hw, ICE_DBG_PHY, "pending idx: %x\n", + *pending_option_idx); + } + + /* mask output options fields */ + for (i = 0; i < *option_count; i++) { + options[i].pmd = FIELD_GET(ICE_AQC_PORT_OPT_PMD_COUNT_M, + options[i].pmd); + options[i].max_lane_speed = FIELD_GET(ICE_AQC_PORT_OPT_MAX_LANE_M, + options[i].max_lane_speed); + ice_debug(hw, ICE_DBG_PHY, "pmds: %x max speed: %x\n", + options[i].pmd, options[i].max_lane_speed); + } + + return 0; +} + +/** + * ice_aq_set_port_option + * @hw: pointer to the HW struct + * @lport: logical port to call the command with + * @lport_valid: when false, FW uses port owned by the PF instead of lport, + * when PF owns more than 1 port it must be true + * @new_option: new port option to be written + * + * Calls Set Port Options AQC (0x06eb). + */ +int +ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid, + u8 new_option) +{ + struct ice_aqc_set_port_option *cmd; + struct ice_aq_desc desc; + + if (new_option > ICE_AQC_PORT_OPT_COUNT_M) + return -EINVAL; + + cmd = &desc.params.set_port_option; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_option); + + if (lport_valid) + cmd->lport_num = lport; + + cmd->lport_num_valid = lport_valid; + cmd->selected_port_option = new_option; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} + +/** * ice_aq_sff_eeprom * @hw: pointer to the HW struct * @lport: bits [7:0] = logical port, bit [8] = logical port valid @@ -5029,20 +5304,22 @@ ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, } /** - * ice_fw_supports_link_override + * ice_is_fw_api_min_ver * @hw: pointer to the hardware structure + * @maj: major version + * @min: minor version + * @patch: patch version * - * Checks if the firmware supports link override + * Checks if the firmware API is minimum version */ -bool ice_fw_supports_link_override(struct ice_hw *hw) +static bool ice_is_fw_api_min_ver(struct ice_hw *hw, u8 maj, u8 min, u8 patch) { - if (hw->api_maj_ver == ICE_FW_API_LINK_OVERRIDE_MAJ) { - if (hw->api_min_ver > ICE_FW_API_LINK_OVERRIDE_MIN) + if (hw->api_maj_ver == maj) { + if (hw->api_min_ver > min) return true; - if (hw->api_min_ver == ICE_FW_API_LINK_OVERRIDE_MIN && - hw->api_patch >= ICE_FW_API_LINK_OVERRIDE_PATCH) + if (hw->api_min_ver == min && hw->api_patch >= patch) return true; - } else if (hw->api_maj_ver > ICE_FW_API_LINK_OVERRIDE_MAJ) { + } else if (hw->api_maj_ver > maj) { return true; } @@ -5050,6 +5327,19 @@ bool ice_fw_supports_link_override(struct ice_hw *hw) } /** + * ice_fw_supports_link_override + * @hw: pointer to the hardware structure + * + * Checks if the firmware supports link override + */ +bool ice_fw_supports_link_override(struct ice_hw *hw) +{ + return ice_is_fw_api_min_ver(hw, ICE_FW_API_LINK_OVERRIDE_MAJ, + ICE_FW_API_LINK_OVERRIDE_MIN, + ICE_FW_API_LINK_OVERRIDE_PATCH); +} + +/** * ice_get_link_default_override * @ldo: pointer to the link default override struct * @pi: pointer to the port info struct @@ -5179,16 +5469,9 @@ bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw) if (hw->mac_type != ICE_MAC_E810) return false; - if (hw->api_maj_ver == ICE_FW_API_LLDP_FLTR_MAJ) { - if (hw->api_min_ver > ICE_FW_API_LLDP_FLTR_MIN) - return true; - if (hw->api_min_ver == ICE_FW_API_LLDP_FLTR_MIN && - hw->api_patch >= ICE_FW_API_LLDP_FLTR_PATCH) - return true; - } else if (hw->api_maj_ver > ICE_FW_API_LLDP_FLTR_MAJ) { - return true; - } - return false; + return ice_is_fw_api_min_ver(hw, ICE_FW_API_LLDP_FLTR_MAJ, + ICE_FW_API_LLDP_FLTR_MIN, + ICE_FW_API_LLDP_FLTR_PATCH); } /** @@ -5225,14 +5508,7 @@ ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add) */ bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) { - if (hw->api_maj_ver == ICE_FW_API_REPORT_DFLT_CFG_MAJ) { - if (hw->api_min_ver > ICE_FW_API_REPORT_DFLT_CFG_MIN) - return true; - if (hw->api_min_ver == ICE_FW_API_REPORT_DFLT_CFG_MIN && - hw->api_patch >= ICE_FW_API_REPORT_DFLT_CFG_PATCH) - return true; - } else if (hw->api_maj_ver > ICE_FW_API_REPORT_DFLT_CFG_MAJ) { - return true; - } - return false; + return ice_is_fw_api_min_ver(hw, ICE_FW_API_REPORT_DFLT_CFG_MAJ, + ICE_FW_API_REPORT_DFLT_CFG_MIN, + ICE_FW_API_REPORT_DFLT_CFG_PATCH); } diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 61b7c60db689..8b6712b92e84 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -151,6 +151,15 @@ int ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, struct ice_sq_cd *cd); int +ice_aq_get_port_options(struct ice_hw *hw, + struct ice_aqc_get_port_options_elem *options, + u8 *option_count, u8 lport, bool lport_valid, + u8 *active_option_idx, bool *active_option_valid, + u8 *pending_option_idx, bool *pending_option_valid); +int +ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid, + u8 new_option); +int ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, bool write, struct ice_sq_cd *cd); @@ -204,6 +213,7 @@ ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value, int ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool *value, struct ice_sq_cd *cd); +bool ice_is_100m_speed_supported(struct ice_hw *hw); int ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, struct ice_sq_cd *cd); diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index b41bc3dc1745..6d560d1c74a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -24,6 +24,11 @@ #define ICE_DEV_ID_E810C_SFP 0x1593 #define ICE_SUBDEV_ID_E810T 0x000E #define ICE_SUBDEV_ID_E810T2 0x000F +#define ICE_SUBDEV_ID_E810T3 0x0010 +#define ICE_SUBDEV_ID_E810T4 0x0011 +#define ICE_SUBDEV_ID_E810T5 0x0012 +#define ICE_SUBDEV_ID_E810T6 0x02E9 +#define ICE_SUBDEV_ID_E810T7 0x02EA /* Intel(R) Ethernet Controller E810-XXV for backplane */ #define ICE_DEV_ID_E810_XXV_BACKPLANE 0x1599 /* Intel(R) Ethernet Controller E810-XXV for QSFP */ diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 3337314a7b35..e6ec20079ced 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -9,6 +9,8 @@ #include "ice_eswitch.h" #include "ice_fw_update.h" +static int ice_active_port_option = -1; + /* context for devlink info version reporting */ struct ice_info_ctx { char buf[128]; @@ -466,12 +468,259 @@ ice_devlink_reload_empr_finish(struct devlink *devlink, return 0; } +/** + * ice_devlink_port_opt_speed_str - convert speed to a string + * @speed: speed value + */ +static const char *ice_devlink_port_opt_speed_str(u8 speed) +{ + switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) { + case ICE_AQC_PORT_OPT_MAX_LANE_100M: + return "0.1"; + case ICE_AQC_PORT_OPT_MAX_LANE_1G: + return "1"; + case ICE_AQC_PORT_OPT_MAX_LANE_2500M: + return "2.5"; + case ICE_AQC_PORT_OPT_MAX_LANE_5G: + return "5"; + case ICE_AQC_PORT_OPT_MAX_LANE_10G: + return "10"; + case ICE_AQC_PORT_OPT_MAX_LANE_25G: + return "25"; + case ICE_AQC_PORT_OPT_MAX_LANE_50G: + return "50"; + case ICE_AQC_PORT_OPT_MAX_LANE_100G: + return "100"; + } + + return "-"; +} + +#define ICE_PORT_OPT_DESC_LEN 50 +/** + * ice_devlink_port_options_print - Print available port split options + * @pf: the PF to print split port options + * + * Prints a table with available port split options and max port speeds + */ +static void ice_devlink_port_options_print(struct ice_pf *pf) +{ + u8 i, j, options_count, cnt, speed, pending_idx, active_idx; + struct ice_aqc_get_port_options_elem *options, *opt; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + char desc[ICE_PORT_OPT_DESC_LEN]; + const char *str; + int status; + + options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV, + sizeof(*options), GFP_KERNEL); + if (!options) + return; + + for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) { + opt = options + i * ICE_AQC_PORT_OPT_MAX; + options_count = ICE_AQC_PORT_OPT_MAX; + active_valid = 0; + + status = ice_aq_get_port_options(&pf->hw, opt, &options_count, + i, true, &active_idx, + &active_valid, &pending_idx, + &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port option for port %d, err %d\n", + i, status); + goto err; + } + } + + dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n"); + dev_dbg(dev, "Status Split Quad 0 Quad 1\n"); + dev_dbg(dev, " count L0 L1 L2 L3 L4 L5 L6 L7\n"); + + for (i = 0; i < options_count; i++) { + cnt = 0; + + if (i == ice_active_port_option) + str = "Active"; + else if ((i == pending_idx) && pending_valid) + str = "Pending"; + else + str = ""; + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-8s", str); + + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%-6u", options[i].pmd); + + for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) { + speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed; + str = ice_devlink_port_opt_speed_str(speed); + cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt, + "%3s ", str); + } + + dev_dbg(dev, "%s\n", desc); + } + +err: + kfree(options); +} + +/** + * ice_devlink_aq_set_port_option - Send set port option admin queue command + * @pf: the PF to print split port options + * @option_idx: selected port option + * @extack: extended netdev ack structure + * + * Sends set port option admin queue command with selected port option and + * calls NVM write activate. + */ +static int +ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx, + struct netlink_ext_ack *extack) +{ + struct device *dev = ice_pf_to_dev(pf); + int status; + + status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx); + if (status) { + dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed"); + return -EIO; + } + + status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + return -EIO; + } + + status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL); + if (status) { + dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n", + status, pf->hw.adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data"); + ice_release_nvm(&pf->hw); + return -EIO; + } + + ice_release_nvm(&pf->hw); + + NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split"); + return 0; +} + +/** + * ice_devlink_port_split - .port_split devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @count: number of ports to split to + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_split operation. + * + * Unfortunately, the devlink expression of available options is limited + * to just a number, so search for an FW port option which supports + * the specified number. As there could be multiple FW port options with + * the same port split count, allow switching between them. When the same + * port split count request is issued again, switch to the next FW port + * option with the same port split count. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port, + unsigned int count, struct netlink_ext_ack *extack) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, j, active_idx, pending_idx, new_option; + struct ice_pf *pf = devlink_priv(devlink); + u8 option_count = ICE_AQC_PORT_OPT_MAX; + struct device *dev = ice_pf_to_dev(pf); + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(dev, "Couldn't read port split options, err = %d\n", + status); + NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options"); + return -EIO; + } + + new_option = ICE_AQC_PORT_OPT_MAX; + active_idx = pending_valid ? pending_idx : active_idx; + for (i = 1; i <= option_count; i++) { + /* In order to allow switching between FW port options with + * the same port split count, search for a new option starting + * from the active/pending option (with array wrap around). + */ + j = (active_idx + i) % option_count; + + if (count == options[j].pmd) { + new_option = j; + break; + } + } + + if (new_option == active_idx) { + dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n", + count); + NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + if (new_option == ICE_AQC_PORT_OPT_MAX) { + dev_dbg(dev, "request to split: count: %u not found\n", count); + NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config"); + ice_devlink_port_options_print(pf); + return -EINVAL; + } + + status = ice_devlink_aq_set_port_option(pf, new_option, extack); + if (status) + return status; + + ice_devlink_port_options_print(pf); + + return 0; +} + +/** + * ice_devlink_port_unsplit - .port_unsplit devlink handler + * @devlink: devlink instance structure + * @port: devlink port structure + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_unsplit operation. + * Calls ice_devlink_port_split with split count set to 1. + * There could be no FW option available with split count 1. + * + * Return: zero on success or an error code on failure. + */ +static int +ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + return ice_devlink_port_split(devlink, port, 1, extack); +} + static const struct devlink_ops ice_devlink_ops = { .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), /* The ice driver currently does not support driver reinit */ .reload_down = ice_devlink_reload_empr_start, .reload_up = ice_devlink_reload_empr_finish, + .port_split = ice_devlink_port_split, + .port_unsplit = ice_devlink_port_unsplit, .eswitch_mode_get = ice_eswitch_mode_get, .eswitch_mode_set = ice_eswitch_mode_set, .info_get = ice_devlink_info_get, @@ -695,6 +944,39 @@ void ice_devlink_unregister_params(struct ice_pf *pf) } /** + * ice_devlink_set_port_split_options - Set port split options + * @pf: the PF to set port split options + * @attrs: devlink attributes + * + * Sets devlink port split options based on available FW port options + */ +static void +ice_devlink_set_port_split_options(struct ice_pf *pf, + struct devlink_port_attrs *attrs) +{ + struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX]; + u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX; + bool active_valid, pending_valid; + int status; + + status = ice_aq_get_port_options(&pf->hw, options, &option_count, + 0, true, &active_idx, &active_valid, + &pending_idx, &pending_valid); + if (status) { + dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n", + status); + return; + } + + /* find the biggest available port split count */ + for (i = 0; i < option_count; i++) + attrs->lanes = max_t(int, attrs->lanes, options[i].pmd); + + attrs->splittable = attrs->lanes ? 1 : 0; + ice_active_port_option = active_idx; +} + +/** * ice_devlink_create_pf_port - Create a devlink port for this PF * @pf: the PF to create a devlink port for * @@ -722,6 +1004,12 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = pf->hw.bus.func; + /* As FW supports only port split options for whole device, + * set port split options only for first PF. + */ + if (pf->hw.pf_id == 0) + ice_devlink_set_port_split_options(pf, &attrs); + ice_devlink_set_switch_id(pf, &attrs.switch_id); devlink_port_attrs_set(devlink_port, &attrs); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index e35371e61e07..f9f15acae90a 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -292,8 +292,8 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) if (max_vsi_num < vsi->vsi_num) max_vsi_num = vsi->vsi_num; - netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi, ice_napi_poll, - NAPI_POLL_WEIGHT); + netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi, + ice_napi_poll); netif_keep_dst(vf->repr->netdev); } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index a6fff8ebaf9d..b7be84bbe72d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -136,6 +136,11 @@ static const struct ice_stats ice_gstrings_pf_stats[] = { ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults), ICE_PF_STAT("fdir_sb_match.nic", stats.fd_sb_match), ICE_PF_STAT("fdir_sb_status.nic", stats.fd_sb_status), + ICE_PF_STAT("tx_hwtstamp_skipped", ptp.tx_hwtstamp_skipped), + ICE_PF_STAT("tx_hwtstamp_timeouts", ptp.tx_hwtstamp_timeouts), + ICE_PF_STAT("tx_hwtstamp_flushed", ptp.tx_hwtstamp_flushed), + ICE_PF_STAT("tx_hwtstamp_discarded", ptp.tx_hwtstamp_discarded), + ICE_PF_STAT("late_cached_phc_updates", ptp.late_cached_phc_updates), }; static const u32 ice_regs_dump_list[] = { @@ -1284,10 +1289,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) } if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { /* down and up VSI so that changes of Rx cfg are reflected. */ - if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - ice_down(vsi); - ice_up(vsi); - } + ice_down_up(vsi); } /* don't allow modification of this flag when a single VF is in * promiscuous mode because it's not supported @@ -1468,20 +1470,22 @@ ice_get_ethtool_stats(struct net_device *netdev, /** * ice_mask_min_supported_speeds + * @hw: pointer to the HW structure * @phy_types_high: PHY type high * @phy_types_low: PHY type low to apply minimum supported speeds mask * * Apply minimum supported speeds mask to PHY type low. These are the speeds * for ethtool supported link mode. */ -static -void ice_mask_min_supported_speeds(u64 phy_types_high, u64 *phy_types_low) +static void +ice_mask_min_supported_speeds(struct ice_hw *hw, + u64 phy_types_high, u64 *phy_types_low) { /* if QSFP connection with 100G speed, minimum supported speed is 25G */ if (*phy_types_low & ICE_PHY_TYPE_LOW_MASK_100G || phy_types_high & ICE_PHY_TYPE_HIGH_MASK_100G) *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_25G; - else + else if (!ice_is_100m_speed_supported(hw)) *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G; } @@ -1531,7 +1535,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo); phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi); - ice_mask_min_supported_speeds(phy_types_high, &phy_types_low); + ice_mask_min_supported_speeds(&pf->hw, phy_types_high, + &phy_types_low); /* determine advertised modes based on link override only * if it's supported and if the FW doesn't abstract the * driver from having to account for link overrides @@ -2826,6 +2831,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, tx_rings[i].count = new_tx_cnt; tx_rings[i].desc = NULL; tx_rings[i].tx_buf = NULL; + tx_rings[i].tx_tstamps = &pf->ptp.port.tx; err = ice_setup_tx_ring(&tx_rings[i]); if (err) { while (i--) @@ -2884,6 +2890,7 @@ process_rx: /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; + rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; rx_rings[i].rx_buf = NULL; /* this is to allow wr32 to have something to write to diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index c9f7393b783d..ee5b36941ba3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -61,13 +61,13 @@ static void ice_lag_set_backup(struct ice_lag *lag) */ static void ice_display_lag_info(struct ice_lag *lag) { - const char *name, *peer, *upper, *role, *bonded, *master; + const char *name, *peer, *upper, *role, *bonded, *primary; struct device *dev = &lag->pf->pdev->dev; name = lag->netdev ? netdev_name(lag->netdev) : "unset"; peer = lag->peer_netdev ? netdev_name(lag->peer_netdev) : "unset"; upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset"; - master = lag->master ? "TRUE" : "FALSE"; + primary = lag->primary ? "TRUE" : "FALSE"; bonded = lag->bonded ? "BONDED" : "UNBONDED"; switch (lag->role) { @@ -87,8 +87,8 @@ static void ice_display_lag_info(struct ice_lag *lag) role = "ERROR"; } - dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, master:%s\n", name, - bonded, peer, upper, role, master); + dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, primary:%s\n", name, + bonded, peer, upper, role, primary); } /** @@ -119,7 +119,7 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) } if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) { - netdev_dbg(lag->netdev, "Bonding event recv, but slave info not for us\n"); + netdev_dbg(lag->netdev, "Bonding event recv, but secondary info not for us\n"); goto lag_out; } @@ -164,8 +164,8 @@ ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info) lag->bonded = true; lag->role = ICE_LAG_UNSET; - /* if this is the first element in an LAG mark as master */ - lag->master = !!(peers == 1); + /* if this is the first element in an LAG mark as primary */ + lag->primary = !!(peers == 1); } /** @@ -264,7 +264,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) netdev_dbg(netdev, "bonding %s\n", info->linking ? "LINK" : "UNLINK"); if (!netif_is_lag_master(info->upper_dev)) { - netdev_dbg(netdev, "changeupper rcvd, but not master. bail\n"); + netdev_dbg(netdev, "changeupper rcvd, but not primary. bail\n"); return; } diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index c2e3688dd8fd..51b5cf467ce2 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -24,7 +24,7 @@ struct ice_lag { struct net_device *upper_netdev; /* upper bonding netdev */ struct notifier_block notif_block; u8 bonded:1; /* currently bonded */ - u8 master:1; /* this is a master */ + u8 primary:1; /* this is primary */ u8 handler:1; /* did we register a rx_netdev_handler */ /* each thing blocking bonding will increment this value by one. * If this value is zero, then bonding is allowed. diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 733c455f6574..938ba8c215cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -914,7 +914,7 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt) */ static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - u16 offset = 0, qmap = 0, tx_count = 0, pow = 0; + u16 offset = 0, qmap = 0, tx_count = 0, rx_count = 0, pow = 0; u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; @@ -981,23 +981,25 @@ static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) * at least 1) */ if (offset) - vsi->num_rxq = offset; + rx_count = offset; else - vsi->num_rxq = num_rxq_per_tc; + rx_count = num_rxq_per_tc; - if (vsi->num_rxq > vsi->alloc_rxq) { + if (rx_count > vsi->alloc_rxq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n", - vsi->num_rxq, vsi->alloc_rxq); + rx_count, vsi->alloc_rxq); return -EINVAL; } - vsi->num_txq = tx_count; - if (vsi->num_txq > vsi->alloc_txq) { + if (tx_count > vsi->alloc_txq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n", - vsi->num_txq, vsi->alloc_txq); + tx_count, vsi->alloc_txq); return -EINVAL; } + vsi->num_txq = tx_count; + vsi->num_rxq = rx_count; + if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) { dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); /* since there is a chance that num_rxq could have been changed @@ -1522,6 +1524,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->netdev = vsi->netdev; ring->dev = dev; ring->count = vsi->num_rx_desc; + ring->cached_phctime = pf->ptp.cached_phc_time; WRITE_ONCE(vsi->rx_rings[i], ring); } @@ -1562,6 +1565,22 @@ void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) } /** + * ice_vsi_cfg_crc_strip - Configure CRC stripping for a VSI + * @vsi: VSI to be configured + * @disable: set to true to have FCS / CRC in the frame data + */ +void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable) +{ + int i; + + ice_for_each_rxq(vsi, i) + if (disable) + vsi->rx_rings[i]->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS; + else + vsi->rx_rings[i]->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS; +} + +/** * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI * @vsi: VSI to be configured */ @@ -1986,8 +2005,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) if (ret) return ret; - ice_for_each_xdp_txq(vsi, i) - vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); + ice_for_each_rxq(vsi, i) + ice_tx_xsk_pool(vsi, i); return ret; } @@ -2969,9 +2988,6 @@ int ice_vsi_release(struct ice_vsi *vsi) clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); } - if (vsi->type == ICE_VSI_PF) - ice_devlink_destroy_pf_port(pf); - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); @@ -3029,6 +3045,9 @@ int ice_vsi_release(struct ice_vsi *vsi) } } + if (vsi->type == ICE_VSI_PF) + ice_devlink_destroy_pf_port(pf); + if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; @@ -3276,6 +3295,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) */ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_vsi_cfg_rss_lut_key(vsi); + + /* disable or enable CRC stripping */ + if (vsi->netdev) + ice_vsi_cfg_crc_strip(vsi, !!(vsi->netdev->features & + NETIF_F_RXFCS)); + break; case ICE_VSI_VF: ret = ice_vsi_alloc_q_vectors(vsi); @@ -3490,6 +3515,7 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap; u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0]; int tc0_qcount = vsi->mqprio_qopt.qopt.count[0]; + u16 new_txq, new_rxq; u8 netdev_tc = 0; int i; @@ -3530,21 +3556,24 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, } } - /* Set actual Tx/Rx queue pairs */ - vsi->num_txq = offset + qcount_tx; - if (vsi->num_txq > vsi->alloc_txq) { + new_txq = offset + qcount_tx; + if (new_txq > vsi->alloc_txq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n", - vsi->num_txq, vsi->alloc_txq); + new_txq, vsi->alloc_txq); return -EINVAL; } - vsi->num_rxq = offset + qcount_rx; - if (vsi->num_rxq > vsi->alloc_rxq) { + new_rxq = offset + qcount_rx; + if (new_rxq > vsi->alloc_rxq) { dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n", - vsi->num_rxq, vsi->alloc_rxq); + new_rxq, vsi->alloc_rxq); return -EINVAL; } + /* Set actual Tx/Rx queue pairs */ + vsi->num_txq = new_txq; + vsi->num_rxq = new_rxq; + /* Setup queue TC[0].qmap for given VSI context */ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); @@ -3576,6 +3605,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; + struct ice_tc_cfg old_tc_cfg; struct ice_vsi_ctx *ctx; struct device *dev; int i, ret = 0; @@ -3600,6 +3630,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) max_txqs[i] = vsi->num_txq; } + memcpy(&old_tc_cfg, &vsi->tc_cfg, sizeof(old_tc_cfg)); vsi->tc_cfg.ena_tc = ena_tc; vsi->tc_cfg.numtc = num_tc; @@ -3616,8 +3647,10 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) else ret = ice_vsi_setup_q_map(vsi, ctx); - if (ret) + if (ret) { + memcpy(&vsi->tc_cfg, &old_tc_cfg, sizeof(vsi->tc_cfg)); goto out; + } /* must to indicate which section of VSI context are being modified */ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 8712b1d2ceec..ec4bf0c89857 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -89,6 +89,8 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi); void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); +void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable); + void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes); void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4ecaf40cf946..0f6718719453 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2399,8 +2399,6 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) return -EBUSY; } - ice_unplug_aux_dev(pf); - switch (reset) { case ICE_RESET_PFR: set_bit(ICE_PFR_REQ, pf->state); @@ -2581,7 +2579,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); spin_lock_init(&xdp_ring->tx_lock); for (j = 0; j < xdp_ring->count; j++) { tx_desc = ICE_TX_DESC(xdp_ring, j); @@ -2589,13 +2586,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) } } - ice_for_each_rxq(vsi, i) { - if (static_key_enabled(&ice_xdp_locking_key)) - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; - else - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; - } - return 0; free_xdp_rings: @@ -2685,6 +2675,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) xdp_rings_rem -= xdp_rings_per_v; } + ice_for_each_rxq(vsi, i) { + if (static_key_enabled(&ice_xdp_locking_key)) { + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; + } else { + struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) { + if (ice_ring_is_xdp(ring)) { + vsi->rx_rings[i]->xdp_ring = ring; + break; + } + } + } + ice_tx_xsk_pool(vsi, i); + } + /* omit the scheduler update if in reset path; XDP queues will be * taken into account at the end of ice_vsi_rebuild, where * ice_cfg_vsi_lan is being called @@ -2889,10 +2896,18 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); } + /* reallocate Rx queues that are used for zero-copy */ + xdp_ring_err = ice_realloc_zc_buf(vsi, true); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_ring_err = ice_destroy_xdp_rings(vsi); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); + /* reallocate Rx queues that were used for zero-copy */ + xdp_ring_err = ice_realloc_zc_buf(vsi, false); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); } else { /* safe to call even when prog == vsi->xdp_prog as * dev_xdp_install in net/core/dev.c incremented prog's @@ -3078,7 +3093,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - ice_ptp_process_ts(pf); + if (!hw->reset_ongoing) + ret = IRQ_WAKE_THREAD; } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -3113,7 +3129,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) ice_service_task_schedule(pf); } } - ret = IRQ_HANDLED; + if (!ret) + ret = IRQ_HANDLED; ice_service_task_schedule(pf); ice_irq_dynamic_ena(hw, NULL, NULL); @@ -3122,6 +3139,24 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) } /** + * ice_misc_intr_thread_fn - misc interrupt thread function + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) +{ + irqreturn_t ret = IRQ_HANDLED; + struct ice_pf *pf = data; + bool irq_handled; + + irq_handled = ice_ptp_process_ts(pf); + if (!irq_handled) + ret = IRQ_WAKE_THREAD; + + return ret; +} + +/** * ice_dis_ctrlq_interrupts - disable control queue interrupts * @hw: pointer to HW structure */ @@ -3233,10 +3268,12 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) pf->num_avail_sw_msix -= 1; pf->oicr_idx = (u16)oicr_idx; - err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector, - ice_misc_intr, 0, pf->int_name, pf); + err = devm_request_threaded_irq(dev, + pf->msix_entries[pf->oicr_idx].vector, + ice_misc_intr, ice_misc_intr_thread_fn, + 0, pf->int_name, pf); if (err) { - dev_err(dev, "devm_request_irq for %s failed: %d\n", + dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n", pf->int_name, err); ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); pf->num_avail_sw_msix += 1; @@ -3273,7 +3310,7 @@ static void ice_napi_add(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, - ice_napi_poll, NAPI_POLL_WEIGHT); + ice_napi_poll); } /** @@ -3376,6 +3413,11 @@ static void ice_set_netdev_features(struct net_device *netdev) if (is_dvm_ena) netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX; + + /* Leave CRC / FCS stripping enabled by default, but allow the value to + * be changed at runtime + */ + netdev->hw_features |= NETIF_F_RXFCS; } /** @@ -3896,7 +3938,7 @@ static int ice_init_pf(struct ice_pf *pf) pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL); if (!pf->avail_rxqs) { - devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs); + bitmap_free(pf->avail_txqs); pf->avail_txqs = NULL; return -ENOMEM; } @@ -3908,87 +3950,134 @@ static int ice_init_pf(struct ice_pf *pf) } /** + * ice_reduce_msix_usage - Reduce usage of MSI-X vectors + * @pf: board private structure + * @v_remain: number of remaining MSI-X vectors to be distributed + * + * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. + * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of + * remaining vectors. + */ +static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) +{ + int v_rdma; + + if (!ice_is_rdma_ena(pf)) { + pf->num_lan_msix = v_remain; + return; + } + + /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ + v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; + + if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { + dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + + pf->num_rdma_msix = 0; + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; + } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || + (v_remain - v_rdma < v_rdma)) { + /* Support minimum RDMA and give remaining vectors to LAN MSIX */ + pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; + pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; + } else { + /* Split remaining MSIX with RDMA after accounting for AEQ MSIX + */ + pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + + ICE_RDMA_NUM_AEQ_MSIX; + pf->num_lan_msix = v_remain - pf->num_rdma_msix; + } +} + +/** * ice_ena_msix_range - Request a range of MSIX vectors from the OS * @pf: board private structure * - * compute the number of MSIX vectors required (v_budget) and request from - * the OS. Return the number of vectors reserved or negative on failure + * Compute the number of MSIX vectors wanted and request from the OS. Adjust + * device usage if there are not enough vectors. Return the number of vectors + * reserved or negative on failure. */ static int ice_ena_msix_range(struct ice_pf *pf) { - int num_cpus, v_left, v_actual, v_other, v_budget = 0; + int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; struct device *dev = ice_pf_to_dev(pf); - int needed, err, i; + int err, i; - v_left = pf->hw.func_caps.common_cap.num_msix_vectors; + hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; num_cpus = num_online_cpus(); - /* reserve for LAN miscellaneous handler */ - needed = ICE_MIN_LAN_OICR_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - v_budget += needed; - v_left -= needed; + /* LAN miscellaneous handler */ + v_other = ICE_MIN_LAN_OICR_MSIX; - /* reserve for flow director */ - if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { - needed = ICE_FDIR_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - v_budget += needed; - v_left -= needed; - } - - /* reserve for switchdev */ - needed = ICE_ESWITCH_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - v_budget += needed; - v_left -= needed; - - /* total used for non-traffic vectors */ - v_other = v_budget; - - /* reserve vectors for LAN traffic */ - needed = num_cpus; - if (v_left < needed) - goto no_hw_vecs_left_err; - pf->num_lan_msix = needed; - v_budget += needed; - v_left -= needed; - - /* reserve vectors for RDMA auxiliary driver */ + /* Flow Director */ + if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) + v_other += ICE_FDIR_MSIX; + + /* switchdev */ + v_other += ICE_ESWITCH_MSIX; + + v_wanted = v_other; + + /* LAN traffic */ + pf->num_lan_msix = num_cpus; + v_wanted += pf->num_lan_msix; + + /* RDMA auxiliary driver */ if (ice_is_rdma_ena(pf)) { - needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - pf->num_rdma_msix = needed; - v_budget += needed; - v_left -= needed; + pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; + v_wanted += pf->num_rdma_msix; + } + + if (v_wanted > hw_num_msix) { + int v_remain; + + dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", + v_wanted, hw_num_msix); + + if (hw_num_msix < ICE_MIN_MSIX) { + err = -ERANGE; + goto exit_err; + } + + v_remain = hw_num_msix - v_other; + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { + v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; + v_remain = ICE_MIN_LAN_TXRX_MSIX; + } + + ice_reduce_msix_usage(pf, v_remain); + v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; + + dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", + pf->num_lan_msix); + if (ice_is_rdma_ena(pf)) + dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); } - pf->msix_entries = devm_kcalloc(dev, v_budget, + pf->msix_entries = devm_kcalloc(dev, v_wanted, sizeof(*pf->msix_entries), GFP_KERNEL); if (!pf->msix_entries) { err = -ENOMEM; goto exit_err; } - for (i = 0; i < v_budget; i++) + for (i = 0; i < v_wanted; i++) pf->msix_entries[i].entry = i; /* actually reserve the vectors */ v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, - ICE_MIN_MSIX, v_budget); + ICE_MIN_MSIX, v_wanted); if (v_actual < 0) { dev_err(dev, "unable to reserve MSI-X vectors\n"); err = v_actual; goto msix_err; } - if (v_actual < v_budget) { + if (v_actual < v_wanted) { dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", - v_budget, v_actual); + v_wanted, v_actual); if (v_actual < ICE_MIN_MSIX) { /* error if we can't get minimum vectors */ @@ -3997,38 +4086,11 @@ static int ice_ena_msix_range(struct ice_pf *pf) goto msix_err; } else { int v_remain = v_actual - v_other; - int v_rdma = 0, v_min_rdma = 0; - if (ice_is_rdma_ena(pf)) { - /* Need at least 1 interrupt in addition to - * AEQ MSIX - */ - v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; - v_min_rdma = ICE_MIN_RDMA_MSIX; - } + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) + v_remain = ICE_MIN_LAN_TXRX_MSIX; - if (v_actual == ICE_MIN_MSIX || - v_remain < ICE_MIN_LAN_TXRX_MSIX + v_min_rdma) { - dev_warn(dev, "Not enough MSI-X vectors to support RDMA.\n"); - clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - - pf->num_rdma_msix = 0; - pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || - (v_remain - v_rdma < v_rdma)) { - /* Support minimum RDMA and give remaining - * vectors to LAN MSIX - */ - pf->num_rdma_msix = v_min_rdma; - pf->num_lan_msix = v_remain - v_min_rdma; - } else { - /* Split remaining MSIX with RDMA after - * accounting for AEQ MSIX - */ - pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + - ICE_RDMA_NUM_AEQ_MSIX; - pf->num_lan_msix = v_remain - pf->num_rdma_msix; - } + ice_reduce_msix_usage(pf, v_remain); dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", pf->num_lan_msix); @@ -4043,12 +4105,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) msix_err: devm_kfree(dev, pf->msix_entries); - goto exit_err; -no_hw_vecs_left_err: - dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n", - needed, v_left); - err = -ERANGE; exit_err: pf->num_rdma_msix = 0; pf->num_lan_msix = 0; @@ -4542,6 +4599,10 @@ static int ice_register_netdev(struct ice_pf *pf) if (!vsi || !vsi->netdev) return -EIO; + err = ice_devlink_create_pf_port(pf); + if (err) + goto err_devlink_create; + err = register_netdev(vsi->netdev); if (err) goto err_register_netdev; @@ -4549,17 +4610,13 @@ static int ice_register_netdev(struct ice_pf *pf) set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); - err = ice_devlink_create_pf_port(pf); - if (err) - goto err_devlink_create; devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); return 0; -err_devlink_create: - unregister_netdev(vsi->netdev); - clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); err_register_netdev: + ice_devlink_destroy_pf_port(pf); +err_devlink_create: free_netdev(vsi->netdev); vsi->netdev = NULL; clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); @@ -4667,8 +4724,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_set_safe_mode_caps(hw); } - hw->ucast_shared = true; - err = ice_init_pf(pf); if (err) { dev_err(dev, "ice_init_pf failed: %d\n", err); @@ -5727,6 +5782,9 @@ ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], NETIF_F_HW_VLAN_STAG_RX | \ NETIF_F_HW_VLAN_STAG_TX) +#define NETIF_VLAN_STRIPPING_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_STAG_RX) + #define NETIF_VLAN_FILTERING_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ NETIF_F_HW_VLAN_STAG_FILTER) @@ -5813,6 +5871,14 @@ ice_fix_features(struct net_device *netdev, netdev_features_t features) NETIF_F_HW_VLAN_STAG_TX); } + if (!(netdev->features & NETIF_F_RXFCS) && + (features & NETIF_F_RXFCS) && + (features & NETIF_VLAN_STRIPPING_FEATURES) && + !ice_vsi_has_non_zero_vlans(np->vsi)) { + netdev_warn(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n"); + features &= ~NETIF_VLAN_STRIPPING_FEATURES; + } + return features; } @@ -5906,6 +5972,13 @@ ice_set_vlan_features(struct net_device *netdev, netdev_features_t features) current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES; requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES; if (current_vlan_features ^ requested_vlan_features) { + if ((features & NETIF_F_RXFCS) && + (features & NETIF_VLAN_STRIPPING_FEATURES)) { + dev_err(ice_pf_to_dev(vsi->back), + "To enable VLAN stripping, you must first enable FCS/CRC stripping\n"); + return -EIO; + } + err = ice_set_vlan_offload_features(vsi, features); if (err) return err; @@ -5987,6 +6060,23 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) if (ret) return ret; + /* Turn on receive of FCS aka CRC, and after setting this + * flag the packet data will have the 4 byte CRC appended + */ + if (changed & NETIF_F_RXFCS) { + if ((features & NETIF_F_RXFCS) && + (features & NETIF_VLAN_STRIPPING_FEATURES)) { + dev_err(ice_pf_to_dev(vsi->back), + "To disable FCS/CRC stripping, you must first disable VLAN stripping\n"); + return -EIO; + } + + ice_vsi_cfg_crc_strip(vsi, !!(features & NETIF_F_RXFCS)); + ret = ice_down_up(vsi); + if (ret) + return ret; + } + if (changed & NETIF_F_NTUPLE) { bool ena = !!(features & NETIF_F_NTUPLE); @@ -6634,7 +6724,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) */ int ice_down(struct ice_vsi *vsi) { - int i, tx_err, rx_err, link_err = 0, vlan_err = 0; + int i, tx_err, rx_err, vlan_err = 0; WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state)); @@ -6668,20 +6758,13 @@ int ice_down(struct ice_vsi *vsi) ice_napi_disable_all(vsi); - if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) { - link_err = ice_force_phys_link_state(vsi, false); - if (link_err) - netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", - vsi->vsi_num, link_err); - } - ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); ice_for_each_rxq(vsi, i) ice_clean_rx_ring(vsi->rx_rings[i]); - if (tx_err || rx_err || link_err || vlan_err) { + if (tx_err || rx_err || vlan_err) { netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); return -EIO; @@ -6691,6 +6774,31 @@ int ice_down(struct ice_vsi *vsi) } /** + * ice_down_up - shutdown the VSI connection and bring it up + * @vsi: the VSI to be reconnected + */ +int ice_down_up(struct ice_vsi *vsi) +{ + int ret; + + /* if DOWN already set, nothing to do */ + if (test_and_set_bit(ICE_VSI_DOWN, vsi->state)) + return 0; + + ret = ice_down(vsi); + if (ret) + return ret; + + ret = ice_up(vsi); + if (ret) { + netdev_err(vsi->netdev, "reallocating resources failed during netdev features change, may need to reload driver\n"); + return ret; + } + + return 0; +} + +/** * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources * @vsi: VSI having resources allocated * @@ -6843,6 +6951,8 @@ int ice_vsi_open(struct ice_vsi *vsi) if (err) goto err_setup_rx; + ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); + if (vsi->type == ICE_VSI_PF) { /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq); @@ -8875,6 +8985,16 @@ int ice_stop(struct net_device *netdev) return -EBUSY; } + if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) { + int link_err = ice_force_phys_link_state(vsi, false); + + if (link_err) { + netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", + vsi->vsi_num, link_err); + return -EIO; + } + } + ice_vsi_close(vsi); return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 13cdb5ea594d..c262dc886e6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -1114,14 +1114,18 @@ int ice_nvm_validate_checksum(struct ice_hw *hw) * Update the control word with the required banks' validity bits * and dumps the Shadow RAM to flash (0x0707) * - * cmd_flags controls which banks to activate, and the preservation level to - * use when activating the NVM bank. + * cmd_flags controls which banks to activate, the preservation level to use + * when activating the NVM bank, and whether an EMP reset is required for + * activation. + * + * Note that the 16bit cmd_flags value is split between two separate 1 byte + * flag values in the descriptor. * * On successful return of the firmware command, the response_flags variable * is updated with the flags reported by firmware indicating certain status, * such as whether EMP reset is enabled. */ -int ice_nvm_write_activate(struct ice_hw *hw, u8 cmd_flags, u8 *response_flags) +int ice_nvm_write_activate(struct ice_hw *hw, u16 cmd_flags, u8 *response_flags) { struct ice_aqc_nvm *cmd; struct ice_aq_desc desc; @@ -1130,7 +1134,8 @@ int ice_nvm_write_activate(struct ice_hw *hw, u8 cmd_flags, u8 *response_flags) cmd = &desc.params.nvm; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_write_activate); - cmd->cmd_flags = cmd_flags; + cmd->cmd_flags = (u8)(cmd_flags & 0xFF); + cmd->offset_high = (u8)((cmd_flags >> 8) & 0xFF); err = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); if (!err && response_flags) diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h index 856d1ad4398b..774c2317967d 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.h +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -34,7 +34,7 @@ ice_aq_update_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, int ice_aq_erase_nvm(struct ice_hw *hw, u16 module_typeid, struct ice_sq_cd *cd); int ice_nvm_validate_checksum(struct ice_hw *hw); -int ice_nvm_write_activate(struct ice_hw *hw, u8 cmd_flags, u8 *response_flags); +int ice_nvm_write_activate(struct ice_hw *hw, u16 cmd_flags, u8 *response_flags); int ice_aq_nvm_update_empr(struct ice_hw *hw); int ice_nvm_set_pkg_data(struct ice_hw *hw, bool del_pkg_data_flag, u8 *data, diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 560efc7654c7..02a4e1cf624e 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -44,6 +44,7 @@ enum ice_protocol_type { ICE_GTP, ICE_GTP_NO_PAY, ICE_PPPOE, + ICE_L2TPV3, ICE_VLAN_EX, ICE_VLAN_IN, ICE_VXLAN_GPE, @@ -111,6 +112,7 @@ enum ice_prot_id { #define ICE_UDP_ILOS_HW 53 #define ICE_GRE_OF_HW 64 #define ICE_PPPOE_HW 103 +#define ICE_L2TPV3_HW 104 #define ICE_UDP_OF_HW 52 /* UDP Tunnels */ #define ICE_META_DATA_ID_HW 255 /* this is used for tunnel and VLAN type */ @@ -217,6 +219,11 @@ struct ice_pppoe_hdr { __be16 ppp_prot_id; /* control and data only */ }; +struct ice_l2tpv3_sess_hdr { + __be32 session_id; + __be64 cookie; +}; + struct ice_nvgre_hdr { __be16 flags; __be16 protocol; @@ -235,6 +242,7 @@ union ice_prot_hdr { struct ice_nvgre_hdr nvgre_hdr; struct ice_udp_gtp_hdr gtp_hdr; struct ice_pppoe_hdr pppoe_hdr; + struct ice_l2tpv3_sess_hdr l2tpv3_sess_hdr; }; /* This is mapping table entry that maps every word within a given protocol diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 72b663108a4a..011b727ab190 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -491,56 +491,6 @@ ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts) } /** - * ice_ptp_update_cached_phctime - Update the cached PHC time values - * @pf: Board specific private structure - * - * This function updates the system time values which are cached in the PF - * structure and the Rx rings. - * - * This function must be called periodically to ensure that the cached value - * is never more than 2 seconds old. It must also be called whenever the PHC - * time has been changed. - * - * Return: - * * 0 - OK, successfully updated - * * -EAGAIN - PF was busy, need to reschedule the update - */ -static int ice_ptp_update_cached_phctime(struct ice_pf *pf) -{ - u64 systime; - int i; - - if (test_and_set_bit(ICE_CFG_BUSY, pf->state)) - return -EAGAIN; - - /* Read the current PHC time */ - systime = ice_ptp_read_src_clk_reg(pf, NULL); - - /* Update the cached PHC time stored in the PF structure */ - WRITE_ONCE(pf->ptp.cached_phc_time, systime); - - ice_for_each_vsi(pf, i) { - struct ice_vsi *vsi = pf->vsi[i]; - int j; - - if (!vsi) - continue; - - if (vsi->type != ICE_VSI_PF) - continue; - - ice_for_each_rxq(vsi, j) { - if (!vsi->rx_rings[j]) - continue; - WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime); - } - } - clear_bit(ICE_CFG_BUSY, pf->state); - - return 0; -} - -/** * ice_ptp_extend_32b_ts - Convert a 32b nanoseconds timestamp to 64b * @cached_phc_time: recently cached copy of PHC time * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value @@ -636,12 +586,400 @@ static u64 ice_ptp_extend_32b_ts(u64 cached_phc_time, u32 in_tstamp) static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) { const u64 mask = GENMASK_ULL(31, 0); + unsigned long discard_time; + + /* Discard the hardware timestamp if the cached PHC time is too old */ + discard_time = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000); + if (time_is_before_jiffies(discard_time)) { + pf->ptp.tx_hwtstamp_discarded++; + return 0; + } return ice_ptp_extend_32b_ts(pf->ptp.cached_phc_time, (in_tstamp >> 8) & mask); } /** + * ice_ptp_tx_tstamp - Process Tx timestamps for a port + * @tx: the PTP Tx timestamp tracker + * + * Process timestamps captured by the PHY associated with this port. To do + * this, loop over each index with a waiting skb. + * + * If a given index has a valid timestamp, perform the following steps: + * + * 1) copy the timestamp out of the PHY register + * 4) clear the timestamp valid bit in the PHY register + * 5) unlock the index by clearing the associated in_use bit. + * 2) extend the 40b timestamp value to get a 64bit timestamp + * 3) send that timestamp to the stack + * + * After looping, if we still have waiting SKBs, return true. This may cause us + * effectively poll even when not strictly necessary. We do this because it's + * possible a new timestamp was requested around the same time as the interrupt. + * In some cases hardware might not interrupt us again when the timestamp is + * captured. + * + * Note that we only take the tracking lock when clearing the bit and when + * checking if we need to re-queue this task. The only place where bits can be + * set is the hard xmit routine where an SKB has a request flag set. The only + * places where we clear bits are this work function, or the periodic cleanup + * thread. If the cleanup thread clears a bit we're processing we catch it + * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread + * starts a new timestamp, we might not begin processing it right away but we + * will notice it at the end when we re-queue the task. If a Tx thread starts + * a new timestamp just after this function exits without re-queuing, + * the interrupt when the timestamp finishes should trigger. Avoiding holding + * the lock for the entire function is important in order to ensure that Tx + * threads do not get blocked while waiting for the lock. + */ +static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) +{ + struct ice_ptp_port *ptp_port; + bool ts_handled = true; + struct ice_pf *pf; + u8 idx; + + if (!tx->init) + return false; + + ptp_port = container_of(tx, struct ice_ptp_port, tx); + pf = ptp_port_to_pf(ptp_port); + + for_each_set_bit(idx, tx->in_use, tx->len) { + struct skb_shared_hwtstamps shhwtstamps = {}; + u8 phy_idx = idx + tx->quad_offset; + u64 raw_tstamp, tstamp; + struct sk_buff *skb; + int err; + + ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); + + err = ice_read_phy_tstamp(&pf->hw, tx->quad, phy_idx, + &raw_tstamp); + if (err) + continue; + + ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); + + /* Check if the timestamp is invalid or stale */ + if (!(raw_tstamp & ICE_PTP_TS_VALID) || + raw_tstamp == tx->tstamps[idx].cached_tstamp) + continue; + + /* The timestamp is valid, so we'll go ahead and clear this + * index and then send the timestamp up to the stack. + */ + spin_lock(&tx->lock); + tx->tstamps[idx].cached_tstamp = raw_tstamp; + clear_bit(idx, tx->in_use); + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; + spin_unlock(&tx->lock); + + /* it's (unlikely but) possible we raced with the cleanup + * thread for discarding old timestamp requests. + */ + if (!skb) + continue; + + /* Extend the timestamp using cached PHC time */ + tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); + if (tstamp) { + shhwtstamps.hwtstamp = ns_to_ktime(tstamp); + ice_trace(tx_tstamp_complete, skb, idx); + } + + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); + } + + /* Check if we still have work to do. If so, re-queue this task to + * poll for remaining timestamps. + */ + spin_lock(&tx->lock); + if (!bitmap_empty(tx->in_use, tx->len)) + ts_handled = false; + spin_unlock(&tx->lock); + + return ts_handled; +} + +/** + * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps + * @tx: Tx tracking structure to initialize + * + * Assumes that the length has already been initialized. Do not call directly, + * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead. + */ +static int +ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) +{ + tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL); + if (!tx->tstamps) + return -ENOMEM; + + tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL); + if (!tx->in_use) { + kfree(tx->tstamps); + tx->tstamps = NULL; + return -ENOMEM; + } + + spin_lock_init(&tx->lock); + + tx->init = 1; + + return 0; +} + +/** + * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker + * @pf: Board private structure + * @tx: the tracker to flush + */ +static void +ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) +{ + u8 idx; + + for (idx = 0; idx < tx->len; idx++) { + u8 phy_idx = idx + tx->quad_offset; + + spin_lock(&tx->lock); + if (tx->tstamps[idx].skb) { + dev_kfree_skb_any(tx->tstamps[idx].skb); + tx->tstamps[idx].skb = NULL; + pf->ptp.tx_hwtstamp_flushed++; + } + clear_bit(idx, tx->in_use); + spin_unlock(&tx->lock); + + /* Clear any potential residual timestamp in the PHY block */ + if (!pf->hw.reset_ongoing) + ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); + } +} + +/** + * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker + * @pf: Board private structure + * @tx: Tx tracking structure to release + * + * Free memory associated with the Tx timestamp tracker. + */ +static void +ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) +{ + tx->init = 0; + + ice_ptp_flush_tx_tracker(pf, tx); + + kfree(tx->tstamps); + tx->tstamps = NULL; + + bitmap_free(tx->in_use); + tx->in_use = NULL; + + tx->len = 0; +} + +/** + * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps + * @pf: Board private structure + * @tx: the Tx tracking structure to initialize + * @port: the port this structure tracks + * + * Initialize the Tx timestamp tracker for this port. For generic MAC devices, + * the timestamp block is shared for all ports in the same quad. To avoid + * ports using the same timestamp index, logically break the block of + * registers into chunks based on the port number. + */ +static int +ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) +{ + tx->quad = port / ICE_PORTS_PER_QUAD; + tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT; + tx->len = INDEX_PER_PORT; + + return ice_ptp_alloc_tx_tracker(tx); +} + +/** + * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps + * @pf: Board private structure + * @tx: the Tx tracking structure to initialize + * + * Initialize the Tx timestamp tracker for this PF. For E810 devices, each + * port has its own block of timestamps, independent of the other ports. + */ +static int +ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) +{ + tx->quad = pf->hw.port_info->lport; + tx->quad_offset = 0; + tx->len = INDEX_PER_QUAD; + + return ice_ptp_alloc_tx_tracker(tx); +} + +/** + * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped + * @pf: pointer to the PF struct + * @tx: PTP Tx tracker to clean up + * + * Loop through the Tx timestamp requests and see if any of them have been + * waiting for a long time. Discard any SKBs that have been waiting for more + * than 2 seconds. This is long enough to be reasonably sure that the + * timestamp will never be captured. This might happen if the packet gets + * discarded before it reaches the PHY timestamping block. + */ +static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx) +{ + struct ice_hw *hw = &pf->hw; + u8 idx; + + if (!tx->init) + return; + + for_each_set_bit(idx, tx->in_use, tx->len) { + struct sk_buff *skb; + u64 raw_tstamp; + + /* Check if this SKB has been waiting for too long */ + if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) + continue; + + /* Read tstamp to be able to use this register again */ + ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, + &raw_tstamp); + + spin_lock(&tx->lock); + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; + clear_bit(idx, tx->in_use); + spin_unlock(&tx->lock); + + /* Count the number of Tx timestamps which have timed out */ + pf->ptp.tx_hwtstamp_timeouts++; + + /* Free the SKB after we've cleared the bit */ + dev_kfree_skb_any(skb); + } +} + +/** + * ice_ptp_update_cached_phctime - Update the cached PHC time values + * @pf: Board specific private structure + * + * This function updates the system time values which are cached in the PF + * structure and the Rx rings. + * + * This function must be called periodically to ensure that the cached value + * is never more than 2 seconds old. + * + * Note that the cached copy in the PF PTP structure is always updated, even + * if we can't update the copy in the Rx rings. + * + * Return: + * * 0 - OK, successfully updated + * * -EAGAIN - PF was busy, need to reschedule the update + */ +static int ice_ptp_update_cached_phctime(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + unsigned long update_before; + u64 systime; + int i; + + update_before = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000); + if (pf->ptp.cached_phc_time && + time_is_before_jiffies(update_before)) { + unsigned long time_taken = jiffies - pf->ptp.cached_phc_jiffies; + + dev_warn(dev, "%u msecs passed between update to cached PHC time\n", + jiffies_to_msecs(time_taken)); + pf->ptp.late_cached_phc_updates++; + } + + /* Read the current PHC time */ + systime = ice_ptp_read_src_clk_reg(pf, NULL); + + /* Update the cached PHC time stored in the PF structure */ + WRITE_ONCE(pf->ptp.cached_phc_time, systime); + WRITE_ONCE(pf->ptp.cached_phc_jiffies, jiffies); + + if (test_and_set_bit(ICE_CFG_BUSY, pf->state)) + return -EAGAIN; + + ice_for_each_vsi(pf, i) { + struct ice_vsi *vsi = pf->vsi[i]; + int j; + + if (!vsi) + continue; + + if (vsi->type != ICE_VSI_PF) + continue; + + ice_for_each_rxq(vsi, j) { + if (!vsi->rx_rings[j]) + continue; + WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime); + } + } + clear_bit(ICE_CFG_BUSY, pf->state); + + return 0; +} + +/** + * ice_ptp_reset_cached_phctime - Reset cached PHC time after an update + * @pf: Board specific private structure + * + * This function must be called when the cached PHC time is no longer valid, + * such as after a time adjustment. It discards any outstanding Tx timestamps, + * and updates the cached PHC time for both the PF and Rx rings. If updating + * the PHC time cannot be done immediately, a warning message is logged and + * the work item is scheduled. + * + * These steps are required in order to ensure that we do not accidentally + * report a timestamp extended by the wrong PHC cached copy. Note that we + * do not directly update the cached timestamp here because it is possible + * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we + * would have to try again. During that time window, timestamps might be + * requested and returned with an invalid extension. Thus, on failure to + * immediately update the cached PHC time we would need to zero the value + * anyways. For this reason, we just zero the value immediately and queue the + * update work item. + */ +static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + int err; + + /* Update the cached PHC time immediately if possible, otherwise + * schedule the work item to execute soon. + */ + err = ice_ptp_update_cached_phctime(pf); + if (err) { + /* If another thread is updating the Rx rings, we won't + * properly reset them here. This could lead to reporting of + * invalid timestamps, but there isn't much we can do. + */ + dev_warn(dev, "%s: ICE_CFG_BUSY, unable to immediately update cached PHC time\n", + __func__); + + /* Queue the work item to update the Rx rings when possible */ + kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work, + msecs_to_jiffies(10)); + } + + /* Flush any outstanding Tx timestamps */ + ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx); +} + +/** * ice_ptp_read_time - Read the time from the device * @pf: Board private structure * @ts: timespec structure to hold the current time value @@ -900,6 +1238,9 @@ static void ice_ptp_wait_for_offset_valid(struct kthread_work *work) hw = &pf->hw; dev = ice_pf_to_dev(pf); + if (ice_is_reset_in_progress(pf->state)) + return; + if (ice_ptp_check_offset_valid(port)) { /* Offsets not ready yet, try again later */ kthread_queue_delayed_work(pf->ptp.kworker, @@ -1509,7 +1850,7 @@ ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts) ice_ptp_unlock(hw); if (!err) - ice_ptp_update_cached_phctime(pf); + ice_ptp_reset_cached_phctime(pf); /* Reenable periodic outputs */ ice_ptp_enable_all_clkout(pf); @@ -1588,7 +1929,7 @@ static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta) return err; } - ice_ptp_update_cached_phctime(pf); + ice_ptp_reset_cached_phctime(pf); return 0; } @@ -1796,26 +2137,31 @@ void ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { + struct skb_shared_hwtstamps *hwtstamps; + u64 ts_ns, cached_time; u32 ts_high; - u64 ts_ns; - /* Populate timesync data into skb */ - if (rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID) { - struct skb_shared_hwtstamps *hwtstamps; + if (!(rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID)) + return; - /* Use ice_ptp_extend_32b_ts directly, using the ring-specific - * cached PHC value, rather than accessing the PF. This also - * allows us to simply pass the upper 32bits of nanoseconds - * directly. Calling ice_ptp_extend_40b_ts is unnecessary as - * it would just discard these bits itself. - */ - ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high); - ts_ns = ice_ptp_extend_32b_ts(rx_ring->cached_phctime, ts_high); + cached_time = READ_ONCE(rx_ring->cached_phctime); - hwtstamps = skb_hwtstamps(skb); - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ts_ns); - } + /* Do not report a timestamp if we don't have a cached PHC time */ + if (!cached_time) + return; + + /* Use ice_ptp_extend_32b_ts directly, using the ring-specific cached + * PHC value, rather than accessing the PF. This also allows us to + * simply pass the upper 32bits of nanoseconds directly. Calling + * ice_ptp_extend_40b_ts is unnecessary as it would just discard these + * bits itself. + */ + ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high); + ts_ns = ice_ptp_extend_32b_ts(cached_time, ts_high); + + hwtstamps = skb_hwtstamps(skb); + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ts_ns); } /** @@ -1871,49 +2217,26 @@ ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) } /** - * ice_ptp_setup_pins_e810t - Setup PTP pins in sysfs + * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs * @pf: pointer to the PF instance * @info: PTP clock capabilities */ static void -ice_ptp_setup_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) +ice_ptp_setup_pins_e810(struct ice_pf *pf, struct ptp_clock_info *info) { - /* Check if SMA controller is in the netlist */ - if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL) && - !ice_is_pca9575_present(&pf->hw)) - ice_clear_feature_support(pf, ICE_F_SMA_CTRL); - - if (!ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) { - info->n_ext_ts = N_EXT_TS_E810_NO_SMA; - info->n_per_out = N_PER_OUT_E810T_NO_SMA; - return; - } + info->n_per_out = N_PER_OUT_E810; - info->n_per_out = N_PER_OUT_E810T; + if (ice_is_feature_supported(pf, ICE_F_PTP_EXTTS)) + info->n_ext_ts = N_EXT_TS_E810; - if (ice_is_feature_supported(pf, ICE_F_PTP_EXTTS)) { + if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) { info->n_ext_ts = N_EXT_TS_E810; info->n_pins = NUM_PTP_PINS_E810T; info->verify = ice_verify_pin_e810t; - } - /* Complete setup of the SMA pins */ - ice_ptp_setup_sma_pins_e810t(pf, info); -} - -/** - * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs - * @pf: pointer to the PF instance - * @info: PTP clock capabilities - */ -static void ice_ptp_setup_pins_e810(struct ice_pf *pf, struct ptp_clock_info *info) -{ - info->n_per_out = N_PER_OUT_E810; - - if (!ice_is_feature_supported(pf, ICE_F_PTP_EXTTS)) - return; - - info->n_ext_ts = N_EXT_TS_E810; + /* Complete setup of the SMA pins */ + ice_ptp_setup_sma_pins_e810t(pf, info); + } } /** @@ -1950,11 +2273,7 @@ static void ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info) { info->enable = ice_ptp_gpio_enable_e810; - - if (ice_is_e810t(&pf->hw)) - ice_ptp_setup_pins_e810t(pf, info); - else - ice_ptp_setup_pins_e810(pf, info); + ice_ptp_setup_pins_e810(pf, info); } /** @@ -2016,112 +2335,6 @@ static long ice_ptp_create_clock(struct ice_pf *pf) } /** - * ice_ptp_tx_tstamp_work - Process Tx timestamps for a port - * @work: pointer to the kthread_work struct - * - * Process timestamps captured by the PHY associated with this port. To do - * this, loop over each index with a waiting skb. - * - * If a given index has a valid timestamp, perform the following steps: - * - * 1) copy the timestamp out of the PHY register - * 4) clear the timestamp valid bit in the PHY register - * 5) unlock the index by clearing the associated in_use bit. - * 2) extend the 40b timestamp value to get a 64bit timestamp - * 3) send that timestamp to the stack - * - * After looping, if we still have waiting SKBs, then re-queue the work. This - * may cause us effectively poll even when not strictly necessary. We do this - * because it's possible a new timestamp was requested around the same time as - * the interrupt. In some cases hardware might not interrupt us again when the - * timestamp is captured. - * - * Note that we only take the tracking lock when clearing the bit and when - * checking if we need to re-queue this task. The only place where bits can be - * set is the hard xmit routine where an SKB has a request flag set. The only - * places where we clear bits are this work function, or the periodic cleanup - * thread. If the cleanup thread clears a bit we're processing we catch it - * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread - * starts a new timestamp, we might not begin processing it right away but we - * will notice it at the end when we re-queue the work item. If a Tx thread - * starts a new timestamp just after this function exits without re-queuing, - * the interrupt when the timestamp finishes should trigger. Avoiding holding - * the lock for the entire function is important in order to ensure that Tx - * threads do not get blocked while waiting for the lock. - */ -static void ice_ptp_tx_tstamp_work(struct kthread_work *work) -{ - struct ice_ptp_port *ptp_port; - struct ice_ptp_tx *tx; - struct ice_pf *pf; - struct ice_hw *hw; - u8 idx; - - tx = container_of(work, struct ice_ptp_tx, work); - if (!tx->init) - return; - - ptp_port = container_of(tx, struct ice_ptp_port, tx); - pf = ptp_port_to_pf(ptp_port); - hw = &pf->hw; - - for_each_set_bit(idx, tx->in_use, tx->len) { - struct skb_shared_hwtstamps shhwtstamps = {}; - u8 phy_idx = idx + tx->quad_offset; - u64 raw_tstamp, tstamp; - struct sk_buff *skb; - int err; - - ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); - - err = ice_read_phy_tstamp(hw, tx->quad, phy_idx, - &raw_tstamp); - if (err) - continue; - - ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); - - /* Check if the timestamp is invalid or stale */ - if (!(raw_tstamp & ICE_PTP_TS_VALID) || - raw_tstamp == tx->tstamps[idx].cached_tstamp) - continue; - - /* The timestamp is valid, so we'll go ahead and clear this - * index and then send the timestamp up to the stack. - */ - spin_lock(&tx->lock); - tx->tstamps[idx].cached_tstamp = raw_tstamp; - clear_bit(idx, tx->in_use); - skb = tx->tstamps[idx].skb; - tx->tstamps[idx].skb = NULL; - spin_unlock(&tx->lock); - - /* it's (unlikely but) possible we raced with the cleanup - * thread for discarding old timestamp requests. - */ - if (!skb) - continue; - - /* Extend the timestamp using cached PHC time */ - tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); - shhwtstamps.hwtstamp = ns_to_ktime(tstamp); - - ice_trace(tx_tstamp_complete, skb, idx); - - skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); - } - - /* Check if we still have work to do. If so, re-queue this task to - * poll for remaining timestamps. - */ - spin_lock(&tx->lock); - if (!bitmap_empty(tx->in_use, tx->len)) - kthread_queue_work(pf->ptp.kworker, &tx->work); - spin_unlock(&tx->lock); -} - -/** * ice_ptp_request_ts - Request an available Tx timestamp index * @tx: the PTP Tx timestamp tracker to request from * @skb: the SKB to associate with this timestamp request @@ -2161,177 +2374,17 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) } /** - * ice_ptp_process_ts - Spawn kthread work to handle timestamps + * ice_ptp_process_ts - Process the PTP Tx timestamps * @pf: Board private structure * - * Queue work required to process the PTP Tx timestamps outside of interrupt - * context. + * Returns true if timestamps are processed. */ -void ice_ptp_process_ts(struct ice_pf *pf) +bool ice_ptp_process_ts(struct ice_pf *pf) { if (pf->ptp.port.tx.init) - kthread_queue_work(pf->ptp.kworker, &pf->ptp.port.tx.work); -} - -/** - * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps - * @tx: Tx tracking structure to initialize - * - * Assumes that the length has already been initialized. Do not call directly, - * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead. - */ -static int -ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) -{ - tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL); - if (!tx->tstamps) - return -ENOMEM; - - tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL); - if (!tx->in_use) { - kfree(tx->tstamps); - tx->tstamps = NULL; - return -ENOMEM; - } - - spin_lock_init(&tx->lock); - kthread_init_work(&tx->work, ice_ptp_tx_tstamp_work); - - tx->init = 1; - - return 0; -} - -/** - * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker - * @pf: Board private structure - * @tx: the tracker to flush - */ -static void -ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) -{ - u8 idx; - - for (idx = 0; idx < tx->len; idx++) { - u8 phy_idx = idx + tx->quad_offset; - - spin_lock(&tx->lock); - if (tx->tstamps[idx].skb) { - dev_kfree_skb_any(tx->tstamps[idx].skb); - tx->tstamps[idx].skb = NULL; - } - clear_bit(idx, tx->in_use); - spin_unlock(&tx->lock); - - /* Clear any potential residual timestamp in the PHY block */ - if (!pf->hw.reset_ongoing) - ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); - } -} - -/** - * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker - * @pf: Board private structure - * @tx: Tx tracking structure to release - * - * Free memory associated with the Tx timestamp tracker. - */ -static void -ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) -{ - tx->init = 0; - - kthread_cancel_work_sync(&tx->work); - - ice_ptp_flush_tx_tracker(pf, tx); - - kfree(tx->tstamps); - tx->tstamps = NULL; - - bitmap_free(tx->in_use); - tx->in_use = NULL; + return ice_ptp_tx_tstamp(&pf->ptp.port.tx); - tx->len = 0; -} - -/** - * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps - * @pf: Board private structure - * @tx: the Tx tracking structure to initialize - * @port: the port this structure tracks - * - * Initialize the Tx timestamp tracker for this port. For generic MAC devices, - * the timestamp block is shared for all ports in the same quad. To avoid - * ports using the same timestamp index, logically break the block of - * registers into chunks based on the port number. - */ -static int -ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) -{ - tx->quad = port / ICE_PORTS_PER_QUAD; - tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT; - tx->len = INDEX_PER_PORT; - - return ice_ptp_alloc_tx_tracker(tx); -} - -/** - * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps - * @pf: Board private structure - * @tx: the Tx tracking structure to initialize - * - * Initialize the Tx timestamp tracker for this PF. For E810 devices, each - * port has its own block of timestamps, independent of the other ports. - */ -static int -ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) -{ - tx->quad = pf->hw.port_info->lport; - tx->quad_offset = 0; - tx->len = INDEX_PER_QUAD; - - return ice_ptp_alloc_tx_tracker(tx); -} - -/** - * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped - * @hw: pointer to the hw struct - * @tx: PTP Tx tracker to clean up - * - * Loop through the Tx timestamp requests and see if any of them have been - * waiting for a long time. Discard any SKBs that have been waiting for more - * than 2 seconds. This is long enough to be reasonably sure that the - * timestamp will never be captured. This might happen if the packet gets - * discarded before it reaches the PHY timestamping block. - */ -static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx) -{ - u8 idx; - - if (!tx->init) - return; - - for_each_set_bit(idx, tx->in_use, tx->len) { - struct sk_buff *skb; - u64 raw_tstamp; - - /* Check if this SKB has been waiting for too long */ - if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) - continue; - - /* Read tstamp to be able to use this register again */ - ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, - &raw_tstamp); - - spin_lock(&tx->lock); - skb = tx->tstamps[idx].skb; - tx->tstamps[idx].skb = NULL; - clear_bit(idx, tx->in_use); - spin_unlock(&tx->lock); - - /* Free the SKB after we've cleared the bit */ - dev_kfree_skb_any(skb); - } + return false; } static void ice_ptp_periodic_work(struct kthread_work *work) @@ -2345,7 +2398,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work) err = ice_ptp_update_cached_phctime(pf); - ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx); + ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx); /* Run twice a second or reschedule if phc update failed */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 10e396abf130..028349295b71 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -105,7 +105,6 @@ struct ice_tx_tstamp { /** * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port - * @work: work function to handle processing of Tx timestamps * @lock: lock to prevent concurrent write to in_use bitmap * @tstamps: array of len to store outstanding requests * @in_use: bitmap of len to indicate which slots are in use @@ -117,7 +116,6 @@ struct ice_tx_tstamp { * window, timestamps are temporarily disabled. */ struct ice_ptp_tx { - struct kthread_work work; spinlock_t lock; /* lock protecting in_use bitmap */ struct ice_tx_tstamp *tstamps; unsigned long *in_use; @@ -163,6 +161,7 @@ struct ice_ptp_port { * @work: delayed work function for periodic tasks * @extts_work: work function for handling external Tx timestamps * @cached_phc_time: a cached copy of the PHC time for timestamp extension + * @cached_phc_jiffies: jiffies when cached_phc_time was last updated * @ext_ts_chan: the external timestamp channel in use * @ext_ts_irq: the external timestamp IRQ in use * @kworker: kwork thread for handling periodic work @@ -171,12 +170,19 @@ struct ice_ptp_port { * @clock: pointer to registered PTP clock device * @tstamp_config: hardware timestamping configuration * @reset_time: kernel time after clock stop on reset + * @tx_hwtstamp_skipped: number of Tx time stamp requests skipped + * @tx_hwtstamp_timeouts: number of Tx skbs discarded with no time stamp + * @tx_hwtstamp_flushed: number of Tx skbs flushed due to interface closed + * @tx_hwtstamp_discarded: number of Tx skbs discarded due to cached PHC time + * being too old to correctly extend timestamp + * @late_cached_phc_updates: number of times cached PHC update is late */ struct ice_ptp { struct ice_ptp_port port; struct kthread_delayed_work work; struct kthread_work extts_work; u64 cached_phc_time; + unsigned long cached_phc_jiffies; u8 ext_ts_chan; u8 ext_ts_irq; struct kthread_worker *kworker; @@ -185,6 +191,11 @@ struct ice_ptp { struct ptp_clock *clock; struct hwtstamp_config tstamp_config; u64 reset_time; + u32 tx_hwtstamp_skipped; + u32 tx_hwtstamp_timeouts; + u32 tx_hwtstamp_flushed; + u32 tx_hwtstamp_discarded; + u32 late_cached_phc_updates; }; #define __ptp_port_to_ptp(p) \ @@ -224,8 +235,8 @@ struct ice_ptp { #define N_EXT_TS_E810 3 #define N_PER_OUT_E810 4 #define N_PER_OUT_E810T 3 -#define N_PER_OUT_E810T_NO_SMA 2 -#define N_EXT_TS_E810_NO_SMA 2 +#define N_PER_OUT_NO_SMA_E810T 2 +#define N_EXT_TS_NO_SMA_E810T 2 #define ETH_GLTSYN_ENA(_i) (0x03000348 + ((_i) * 4)) #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) @@ -236,7 +247,7 @@ void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena); int ice_get_ptp_clock_index(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); -void ice_ptp_process_ts(struct ice_pf *pf); +bool ice_ptp_process_ts(struct ice_pf *pf); void ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, @@ -269,7 +280,10 @@ ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) return -1; } -static inline void ice_ptp_process_ts(struct ice_pf *pf) { } +static inline bool ice_ptp_process_ts(struct ice_pf *pf) +{ + return true; +} static inline void ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 6dff97d53d81..772b1f566d6e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2021, Intel Corporation. */ +#include <linux/delay.h> #include "ice_common.h" #include "ice_ptp_hw.h" #include "ice_ptp_consts.h" @@ -2587,38 +2588,113 @@ static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val) } /** - * ice_read_phy_tstamp_e810 - Read a PHY timestamp out of the external PHY + * ice_read_phy_tstamp_ll_e810 - Read a PHY timestamp registers through the FW + * @hw: pointer to the HW struct + * @idx: the timestamp index to read + * @hi: 8 bit timestamp high value + * @lo: 32 bit timestamp low value + * + * Read a 8bit timestamp high value and 32 bit timestamp low value out of the + * timestamp block of the external PHY on the E810 device using the low latency + * timestamp read. + */ +static int +ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo) +{ + u32 val; + u8 i; + + /* Write TS index to read to the PF register so the FW can read it */ + val = FIELD_PREP(TS_LL_READ_TS_IDX, idx) | TS_LL_READ_TS; + wr32(hw, PF_SB_ATQBAL, val); + + /* Read the register repeatedly until the FW provides us the TS */ + for (i = TS_LL_READ_RETRIES; i > 0; i--) { + val = rd32(hw, PF_SB_ATQBAL); + + /* When the bit is cleared, the TS is ready in the register */ + if (!(FIELD_GET(TS_LL_READ_TS, val))) { + /* High 8 bit value of the TS is on the bits 16:23 */ + *hi = FIELD_GET(TS_LL_READ_TS_HIGH, val); + + /* Read the low 32 bit value and set the TS valid bit */ + *lo = rd32(hw, PF_SB_ATQBAH) | TS_VALID; + return 0; + } + + udelay(10); + } + + /* FW failed to provide the TS in time */ + ice_debug(hw, ICE_DBG_PTP, "Failed to read PTP timestamp using low latency read\n"); + return -EINVAL; +} + +/** + * ice_read_phy_tstamp_sbq_e810 - Read a PHY timestamp registers through the sbq * @hw: pointer to the HW struct * @lport: the lport to read from * @idx: the timestamp index to read - * @tstamp: on return, the 40bit timestamp value + * @hi: 8 bit timestamp high value + * @lo: 32 bit timestamp low value * - * Read a 40bit timestamp value out of the timestamp block of the external PHY - * on the E810 device. + * Read a 8bit timestamp high value and 32 bit timestamp low value out of the + * timestamp block of the external PHY on the E810 device using sideband queue. */ static int -ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) +ice_read_phy_tstamp_sbq_e810(struct ice_hw *hw, u8 lport, u8 idx, u8 *hi, + u32 *lo) { - u32 lo_addr, hi_addr, lo, hi; + u32 hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx); + u32 lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx); + u32 lo_val, hi_val; int err; - lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx); - hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx); - - err = ice_read_phy_reg_e810(hw, lo_addr, &lo); + err = ice_read_phy_reg_e810(hw, lo_addr, &lo_val); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, err %d\n", err); return err; } - err = ice_read_phy_reg_e810(hw, hi_addr, &hi); + err = ice_read_phy_reg_e810(hw, hi_addr, &hi_val); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, err %d\n", err); return err; } + *lo = lo_val; + *hi = (u8)hi_val; + + return 0; +} + +/** + * ice_read_phy_tstamp_e810 - Read a PHY timestamp out of the external PHY + * @hw: pointer to the HW struct + * @lport: the lport to read from + * @idx: the timestamp index to read + * @tstamp: on return, the 40bit timestamp value + * + * Read a 40bit timestamp value out of the timestamp block of the external PHY + * on the E810 device. + */ +static int +ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp) +{ + u32 lo = 0; + u8 hi = 0; + int err; + + if (hw->dev_caps.ts_dev_info.ts_ll_read) + err = ice_read_phy_tstamp_ll_e810(hw, idx, &hi, &lo); + else + err = ice_read_phy_tstamp_sbq_e810(hw, lport, idx, &hi, &lo); + + if (err) + return err; + /* For E810 devices, the timestamp is reported with the lower 32 bits * in the low register, and the upper 8 bits in the high register. */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 1246e4ee4b5d..2bda64c76abc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -402,6 +402,7 @@ bool ice_is_pca9575_present(struct ice_hw *hw); #define INCVAL_HIGH_M 0xFF /* Timestamp block macros */ +#define TS_VALID BIT(0) #define TS_LOW_M 0xFFFFFFFF #define TS_HIGH_M 0xFF #define TS_HIGH_S 32 @@ -413,6 +414,12 @@ bool ice_is_pca9575_present(struct ice_hw *hw); #define BYTES_PER_IDX_ADDR_L_U 8 #define BYTES_PER_IDX_ADDR_L 4 +/* Tx timestamp low latency read definitions */ +#define TS_LL_READ_RETRIES 200 +#define TS_LL_READ_TS_HIGH GENMASK(23, 16) +#define TS_LL_READ_TS_IDX GENMASK(29, 24) +#define TS_LL_READ_TS BIT(31) + /* Internal PHY timestamp address */ #define TS_L(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U)) #define TS_H(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U + \ diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 0dac67cd9c77..bd31748aae1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -377,10 +377,10 @@ static void ice_repr_rem(struct ice_vf *vf) if (!vf->repr) return; - ice_devlink_destroy_vf_port(vf); kfree(vf->repr->q_vector); vf->repr->q_vector = NULL; unregister_netdev(vf->repr->netdev); + ice_devlink_destroy_vf_port(vf); free_netdev(vf->repr->netdev); vf->repr->netdev = NULL; #ifdef CONFIG_ICE_SWITCHDEV diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 7947223536e3..118595763bba 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1212,7 +1212,7 @@ int ice_sched_init_port(struct ice_port_info *pi) hw = pi->hw; /* Query the Default Topology from FW */ - buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL); + buf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1290,7 +1290,7 @@ err_init_port: pi->root = NULL; } - devm_kfree(ice_hw_to_dev(hw), buf); + kfree(buf); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 3808034f7e7e..9b762f7972ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -42,6 +42,7 @@ enum { ICE_PKT_GTP_NOPAY = BIT(8), ICE_PKT_KMALLOC = BIT(9), ICE_PKT_PPPOE = BIT(10), + ICE_PKT_L2TPV3 = BIT(11), }; struct ice_dummy_pkt_offsets { @@ -1258,6 +1259,65 @@ ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv6_udp) = { 0x00, 0x00, /* 2 bytes for 4 bytes alignment */ }; +ICE_DECLARE_PKT_OFFSETS(ipv4_l2tpv3) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_L2TPV3, 34 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(ipv4_l2tpv3) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x20, /* ICE_IPV4_IL 14 */ + 0x00, 0x00, 0x40, 0x00, + 0x40, 0x73, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_L2TPV3 34 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, /* 2 bytes for 4 bytes alignment */ +}; + +ICE_DECLARE_PKT_OFFSETS(ipv6_l2tpv3) = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV6_OFOS, 14 }, + { ICE_L2TPV3, 54 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +ICE_DECLARE_PKT_TEMPLATE(ipv6_l2tpv3) = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x86, 0xDD, /* ICE_ETYPE_OL 12 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 14 */ + 0x00, 0x0c, 0x73, 0x40, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_L2TPV3 54 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, /* 2 bytes for 4 bytes alignment */ +}; + static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = { ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPU | ICE_PKT_OUTER_IPV6 | ICE_PKT_GTP_NOPAY), @@ -1297,6 +1357,8 @@ static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = { ICE_PKT_PROFILE(udp_tun_ipv6_tcp, ICE_PKT_TUN_UDP | ICE_PKT_INNER_IPV6 | ICE_PKT_INNER_TCP), + ICE_PKT_PROFILE(ipv6_l2tpv3, ICE_PKT_L2TPV3 | ICE_PKT_OUTER_IPV6), + ICE_PKT_PROFILE(ipv4_l2tpv3, ICE_PKT_L2TPV3), ICE_PKT_PROFILE(udp_tun_tcp, ICE_PKT_TUN_UDP | ICE_PKT_INNER_TCP), ICE_PKT_PROFILE(udp_tun_ipv6_udp, ICE_PKT_TUN_UDP | ICE_PKT_INNER_IPV6), @@ -2274,9 +2336,7 @@ int ice_get_initial_sw_cfg(struct ice_hw *hw) int status; u16 i; - rbuf = devm_kzalloc(ice_hw_to_dev(hw), ICE_SW_CFG_MAX_BUF_LEN, - GFP_KERNEL); - + rbuf = kzalloc(ICE_SW_CFG_MAX_BUF_LEN, GFP_KERNEL); if (!rbuf) return -ENOMEM; @@ -2324,7 +2384,7 @@ int ice_get_initial_sw_cfg(struct ice_hw *hw) } } while (req_desc && !status); - devm_kfree(ice_hw_to_dev(hw), rbuf); + kfree(rbuf); return status; } @@ -3449,31 +3509,15 @@ bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle) * ice_add_mac - Add a MAC address based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information - * - * IMPORTANT: When the ucast_shared flag is set to false and m_list has - * multiple unicast addresses, the function assumes that all the - * addresses are unique in a given add_mac call. It doesn't - * check for duplicates in this case, removing duplicates from a given - * list should be taken care of in the caller of this function. */ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) { - struct ice_sw_rule_lkup_rx_tx *s_rule, *r_iter; struct ice_fltr_list_entry *m_list_itr; - struct list_head *rule_head; - u16 total_elem_left, s_rule_size; - struct ice_switch_info *sw; - struct mutex *rule_lock; /* Lock to protect filter rule list */ - u16 num_unicast = 0; int status = 0; - u8 elem_sent; if (!m_list || !hw) return -EINVAL; - s_rule = NULL; - sw = hw->switch_info; - rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; list_for_each_entry(m_list_itr, m_list, list_entry) { u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0]; u16 vsi_handle; @@ -3492,106 +3536,13 @@ int ice_add_mac(struct ice_hw *hw, struct list_head *m_list) if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC || is_zero_ether_addr(add)) return -EINVAL; - if (is_unicast_ether_addr(add) && !hw->ucast_shared) { - /* Don't overwrite the unicast address */ - mutex_lock(rule_lock); - if (ice_find_rule_entry(hw, ICE_SW_LKUP_MAC, - &m_list_itr->fltr_info)) { - mutex_unlock(rule_lock); - return -EEXIST; - } - mutex_unlock(rule_lock); - num_unicast++; - } else if (is_multicast_ether_addr(add) || - (is_unicast_ether_addr(add) && hw->ucast_shared)) { - m_list_itr->status = - ice_add_rule_internal(hw, ICE_SW_LKUP_MAC, - m_list_itr); - if (m_list_itr->status) - return m_list_itr->status; - } - } - mutex_lock(rule_lock); - /* Exit if no suitable entries were found for adding bulk switch rule */ - if (!num_unicast) { - status = 0; - goto ice_add_mac_exit; - } - - rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; - - /* Allocate switch rule buffer for the bulk update for unicast */ - s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule); - s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size, - GFP_KERNEL); - if (!s_rule) { - status = -ENOMEM; - goto ice_add_mac_exit; - } - - r_iter = s_rule; - list_for_each_entry(m_list_itr, m_list, list_entry) { - struct ice_fltr_info *f_info = &m_list_itr->fltr_info; - u8 *mac_addr = &f_info->l_data.mac.mac_addr[0]; - - if (is_unicast_ether_addr(mac_addr)) { - ice_fill_sw_rule(hw, &m_list_itr->fltr_info, r_iter, - ice_aqc_opc_add_sw_rules); - r_iter = (typeof(s_rule))((u8 *)r_iter + s_rule_size); - } - } - - /* Call AQ bulk switch rule update for all unicast addresses */ - r_iter = s_rule; - /* Call AQ switch rule in AQ_MAX chunk */ - for (total_elem_left = num_unicast; total_elem_left > 0; - total_elem_left -= elem_sent) { - struct ice_sw_rule_lkup_rx_tx *entry = r_iter; - - elem_sent = min_t(u8, total_elem_left, - (ICE_AQ_MAX_BUF_LEN / s_rule_size)); - status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size, - elem_sent, ice_aqc_opc_add_sw_rules, - NULL); - if (status) - goto ice_add_mac_exit; - r_iter = (typeof(s_rule)) - ((u8 *)r_iter + (elem_sent * s_rule_size)); + m_list_itr->status = ice_add_rule_internal(hw, ICE_SW_LKUP_MAC, + m_list_itr); + if (m_list_itr->status) + return m_list_itr->status; } - /* Fill up rule ID based on the value returned from FW */ - r_iter = s_rule; - list_for_each_entry(m_list_itr, m_list, list_entry) { - struct ice_fltr_info *f_info = &m_list_itr->fltr_info; - u8 *mac_addr = &f_info->l_data.mac.mac_addr[0]; - struct ice_fltr_mgmt_list_entry *fm_entry; - - if (is_unicast_ether_addr(mac_addr)) { - f_info->fltr_rule_id = le16_to_cpu(r_iter->index); - f_info->fltr_act = ICE_FWD_TO_VSI; - /* Create an entry to track this MAC address */ - fm_entry = devm_kzalloc(ice_hw_to_dev(hw), - sizeof(*fm_entry), GFP_KERNEL); - if (!fm_entry) { - status = -ENOMEM; - goto ice_add_mac_exit; - } - fm_entry->fltr_info = *f_info; - fm_entry->vsi_count = 1; - /* The book keeping entries will get removed when - * base driver calls remove filter AQ command - */ - - list_add(&fm_entry->list_entry, rule_head); - r_iter = (typeof(s_rule))((u8 *)r_iter + s_rule_size); - } - } - -ice_add_mac_exit: - mutex_unlock(rule_lock); - if (s_rule) - devm_kfree(ice_hw_to_dev(hw), s_rule); return status; } @@ -3979,38 +3930,6 @@ ice_check_if_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, } /** - * ice_find_ucast_rule_entry - Search for a unicast MAC filter rule entry - * @hw: pointer to the hardware structure - * @recp_id: lookup type for which the specified rule needs to be searched - * @f_info: rule information - * - * Helper function to search for a unicast rule entry - this is to be used - * to remove unicast MAC filter that is not shared with other VSIs on the - * PF switch. - * - * Returns pointer to entry storing the rule if found - */ -static struct ice_fltr_mgmt_list_entry * -ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id, - struct ice_fltr_info *f_info) -{ - struct ice_switch_info *sw = hw->switch_info; - struct ice_fltr_mgmt_list_entry *list_itr; - struct list_head *list_head; - - list_head = &sw->recp_list[recp_id].filt_rules; - list_for_each_entry(list_itr, list_head, list_entry) { - if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data, - sizeof(f_info->l_data)) && - f_info->fwd_id.hw_vsi_id == - list_itr->fltr_info.fwd_id.hw_vsi_id && - f_info->flag == list_itr->fltr_info.flag) - return list_itr; - } - return NULL; -} - -/** * ice_remove_mac - remove a MAC address based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information @@ -4026,15 +3945,12 @@ ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id, int ice_remove_mac(struct ice_hw *hw, struct list_head *m_list) { struct ice_fltr_list_entry *list_itr, *tmp; - struct mutex *rule_lock; /* Lock to protect filter rule list */ if (!m_list) return -EINVAL; - rule_lock = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; list_for_each_entry_safe(list_itr, tmp, m_list, list_entry) { enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type; - u8 *add = &list_itr->fltr_info.l_data.mac.mac_addr[0]; u16 vsi_handle; if (l_type != ICE_SW_LKUP_MAC) @@ -4046,19 +3962,7 @@ int ice_remove_mac(struct ice_hw *hw, struct list_head *m_list) list_itr->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); - if (is_unicast_ether_addr(add) && !hw->ucast_shared) { - /* Don't remove the unicast address that belongs to - * another VSI on the switch, since it is not being - * shared... - */ - mutex_lock(rule_lock); - if (!ice_find_ucast_rule_entry(hw, ICE_SW_LKUP_MAC, - &list_itr->fltr_info)) { - mutex_unlock(rule_lock); - return -ENOENT; - } - mutex_unlock(rule_lock); - } + list_itr->status = ice_remove_rule_internal(hw, ICE_SW_LKUP_MAC, list_itr); @@ -4648,6 +4552,7 @@ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { { ICE_GTP, { 8, 10, 12, 14, 16, 18, 20, 22 } }, { ICE_GTP_NO_PAY, { 8, 10, 12, 14 } }, { ICE_PPPOE, { 0, 2, 4, 6 } }, + { ICE_L2TPV3, { 0, 2, 4, 6, 8, 10 } }, { ICE_VLAN_EX, { 2, 0 } }, { ICE_VLAN_IN, { 2, 0 } }, }; @@ -4671,6 +4576,7 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { { ICE_GTP, ICE_UDP_OF_HW }, { ICE_GTP_NO_PAY, ICE_UDP_ILOS_HW }, { ICE_PPPOE, ICE_PPPOE_HW }, + { ICE_L2TPV3, ICE_L2TPV3_HW }, { ICE_VLAN_EX, ICE_VLAN_OF_HW }, { ICE_VLAN_IN, ICE_VLAN_OL_HW }, }; @@ -5754,7 +5660,8 @@ ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, if (lkups[i].h_u.pppoe_hdr.ppp_prot_id == htons(PPP_IPV6)) match |= ICE_PKT_OUTER_IPV6; - } + } else if (lkups[i].type == ICE_L2TPV3) + match |= ICE_PKT_L2TPV3; } while (ret->match && (match & ret->match) != ret->match) @@ -5855,6 +5762,9 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, case ICE_PPPOE: len = sizeof(struct ice_pppoe_hdr); break; + case ICE_L2TPV3: + len = sizeof(struct ice_l2tpv3_sess_hdr); + break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index a298862857a8..f68c555be4e9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -36,6 +36,10 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) lkups_cnt++; + if (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS | + ICE_TC_FLWR_FIELD_ENC_IP_TTL)) + lkups_cnt++; + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) lkups_cnt++; @@ -47,11 +51,11 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, lkups_cnt++; /* is VLAN specified? */ - if (flags & ICE_TC_FLWR_FIELD_VLAN) + if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) lkups_cnt++; /* is CVLAN specified? */ - if (flags & ICE_TC_FLWR_FIELD_CVLAN) + if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) lkups_cnt++; /* are PPPoE options specified? */ @@ -64,6 +68,13 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, ICE_TC_FLWR_FIELD_DEST_IPV6 | ICE_TC_FLWR_FIELD_SRC_IPV6)) lkups_cnt++; + if (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL)) + lkups_cnt++; + + /* are L2TPv3 options specified? */ + if (flags & ICE_TC_FLWR_FIELD_L2TPV3_SESSID) + lkups_cnt++; + /* is L4 (TCP/UDP/any other L4 protocol fields) specified? */ if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT | ICE_TC_FLWR_FIELD_SRC_L4_PORT)) @@ -257,6 +268,50 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, i++; } + if (fltr->inner_headers.l2_key.n_proto == htons(ETH_P_IP) && + (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS | + ICE_TC_FLWR_FIELD_ENC_IP_TTL))) { + list[i].type = ice_proto_type_from_ipv4(false); + + if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TOS) { + list[i].h_u.ipv4_hdr.tos = hdr->l3_key.tos; + list[i].m_u.ipv4_hdr.tos = hdr->l3_mask.tos; + } + + if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TTL) { + list[i].h_u.ipv4_hdr.time_to_live = hdr->l3_key.ttl; + list[i].m_u.ipv4_hdr.time_to_live = hdr->l3_mask.ttl; + } + + i++; + } + + if (fltr->inner_headers.l2_key.n_proto == htons(ETH_P_IPV6) && + (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS | + ICE_TC_FLWR_FIELD_ENC_IP_TTL))) { + struct ice_ipv6_hdr *hdr_h, *hdr_m; + + hdr_h = &list[i].h_u.ipv6_hdr; + hdr_m = &list[i].m_u.ipv6_hdr; + list[i].type = ice_proto_type_from_ipv6(false); + + if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TOS) { + be32p_replace_bits(&hdr_h->be_ver_tc_flow, + hdr->l3_key.tos, + ICE_IPV6_HDR_TC_MASK); + be32p_replace_bits(&hdr_m->be_ver_tc_flow, + hdr->l3_mask.tos, + ICE_IPV6_HDR_TC_MASK); + } + + if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TTL) { + hdr_h->hop_limit = hdr->l3_key.ttl; + hdr_m->hop_limit = hdr->l3_mask.ttl; + } + + i++; + } + if ((flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) && hdr->l3_key.ip_proto == IPPROTO_UDP) { list[i].type = ICE_UDP_OF; @@ -334,7 +389,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, } /* copy VLAN info */ - if (flags & ICE_TC_FLWR_FIELD_VLAN) { + if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) { vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid); rule_info->vlan_type = ice_check_supported_vlan_tpid(vlan_tpid); @@ -343,15 +398,45 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, list[i].type = ICE_VLAN_EX; else list[i].type = ICE_VLAN_OFOS; - list[i].h_u.vlan_hdr.vlan = headers->vlan_hdr.vlan_id; - list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xFFFF); + + if (flags & ICE_TC_FLWR_FIELD_VLAN) { + list[i].h_u.vlan_hdr.vlan = headers->vlan_hdr.vlan_id; + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0x0FFF); + } + + if (flags & ICE_TC_FLWR_FIELD_VLAN_PRIO) { + if (flags & ICE_TC_FLWR_FIELD_VLAN) { + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xEFFF); + } else { + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xE000); + list[i].h_u.vlan_hdr.vlan = 0; + } + list[i].h_u.vlan_hdr.vlan |= + headers->vlan_hdr.vlan_prio; + } + i++; } - if (flags & ICE_TC_FLWR_FIELD_CVLAN) { + if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) { list[i].type = ICE_VLAN_IN; - list[i].h_u.vlan_hdr.vlan = headers->cvlan_hdr.vlan_id; - list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xFFFF); + + if (flags & ICE_TC_FLWR_FIELD_CVLAN) { + list[i].h_u.vlan_hdr.vlan = headers->cvlan_hdr.vlan_id; + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0x0FFF); + } + + if (flags & ICE_TC_FLWR_FIELD_CVLAN_PRIO) { + if (flags & ICE_TC_FLWR_FIELD_CVLAN) { + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xEFFF); + } else { + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xE000); + list[i].h_u.vlan_hdr.vlan = 0; + } + list[i].h_u.vlan_hdr.vlan |= + headers->cvlan_hdr.vlan_prio; + } + i++; } @@ -420,6 +505,61 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, i++; } + if (headers->l2_key.n_proto == htons(ETH_P_IP) && + (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))) { + list[i].type = ice_proto_type_from_ipv4(inner); + + if (flags & ICE_TC_FLWR_FIELD_IP_TOS) { + list[i].h_u.ipv4_hdr.tos = headers->l3_key.tos; + list[i].m_u.ipv4_hdr.tos = headers->l3_mask.tos; + } + + if (flags & ICE_TC_FLWR_FIELD_IP_TTL) { + list[i].h_u.ipv4_hdr.time_to_live = + headers->l3_key.ttl; + list[i].m_u.ipv4_hdr.time_to_live = + headers->l3_mask.ttl; + } + + i++; + } + + if (headers->l2_key.n_proto == htons(ETH_P_IPV6) && + (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))) { + struct ice_ipv6_hdr *hdr_h, *hdr_m; + + hdr_h = &list[i].h_u.ipv6_hdr; + hdr_m = &list[i].m_u.ipv6_hdr; + list[i].type = ice_proto_type_from_ipv6(inner); + + if (flags & ICE_TC_FLWR_FIELD_IP_TOS) { + be32p_replace_bits(&hdr_h->be_ver_tc_flow, + headers->l3_key.tos, + ICE_IPV6_HDR_TC_MASK); + be32p_replace_bits(&hdr_m->be_ver_tc_flow, + headers->l3_mask.tos, + ICE_IPV6_HDR_TC_MASK); + } + + if (flags & ICE_TC_FLWR_FIELD_IP_TTL) { + hdr_h->hop_limit = headers->l3_key.ttl; + hdr_m->hop_limit = headers->l3_mask.ttl; + } + + i++; + } + + if (flags & ICE_TC_FLWR_FIELD_L2TPV3_SESSID) { + list[i].type = ICE_L2TPV3; + + list[i].h_u.l2tpv3_sess_hdr.session_id = + headers->l2tpv3_hdr.session_id; + list[i].m_u.l2tpv3_sess_hdr.session_id = + cpu_to_be32(0xFFFFFFFF); + + i++; + } + /* copy L4 (src, dest) port */ if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT | ICE_TC_FLWR_FIELD_SRC_L4_PORT)) { @@ -839,6 +979,40 @@ ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match, } /** + * ice_tc_set_tos_ttl - Parse IP ToS/TTL from TC flower filter + * @match: Pointer to flow match structure + * @fltr: Pointer to filter structure + * @headers: inner or outer header fields + * @is_encap: set true for tunnel + */ +static void +ice_tc_set_tos_ttl(struct flow_match_ip *match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers, + bool is_encap) +{ + if (match->mask->tos) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_IP_TOS; + else + fltr->flags |= ICE_TC_FLWR_FIELD_IP_TOS; + + headers->l3_key.tos = match->key->tos; + headers->l3_mask.tos = match->mask->tos; + } + + if (match->mask->ttl) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_IP_TTL; + else + fltr->flags |= ICE_TC_FLWR_FIELD_IP_TTL; + + headers->l3_key.ttl = match->key->ttl; + headers->l3_mask.ttl = match->mask->ttl; + } +} + +/** * ice_tc_set_port - Parse ports from TC flower filter * @match: Flow match structure * @fltr: Pointer to filter structure @@ -967,10 +1141,7 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, struct flow_match_ip match; flow_rule_match_enc_ip(rule, &match); - headers->l3_key.tos = match.key->tos; - headers->l3_key.ttl = match.key->ttl; - headers->l3_mask.tos = match.mask->tos; - headers->l3_mask.ttl = match.mask->ttl; + ice_tc_set_tos_ttl(&match, fltr, headers, true); } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) && @@ -1039,9 +1210,11 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_PORTS) | - BIT(FLOW_DISSECTOR_KEY_PPPOE))) { + BIT(FLOW_DISSECTOR_KEY_PPPOE) | + BIT(FLOW_DISSECTOR_KEY_L2TPV3))) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used"); return -EOPNOTSUPP; } @@ -1137,16 +1310,22 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, if (match.mask->vlan_id) { if (match.mask->vlan_id == VLAN_VID_MASK) { fltr->flags |= ICE_TC_FLWR_FIELD_VLAN; + headers->vlan_hdr.vlan_id = + cpu_to_be16(match.key->vlan_id & + VLAN_VID_MASK); } else { NL_SET_ERR_MSG_MOD(fltr->extack, "Bad VLAN mask"); return -EINVAL; } } - headers->vlan_hdr.vlan_id = - cpu_to_be16(match.key->vlan_id & VLAN_VID_MASK); - if (match.mask->vlan_priority) - headers->vlan_hdr.vlan_prio = match.key->vlan_priority; + if (match.mask->vlan_priority) { + fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_PRIO; + headers->vlan_hdr.vlan_prio = + cpu_to_be16((match.key->vlan_priority << + VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK); + } + if (match.mask->vlan_tpid) headers->vlan_hdr.vlan_tpid = match.key->vlan_tpid; } @@ -1164,6 +1343,9 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, if (match.mask->vlan_id) { if (match.mask->vlan_id == VLAN_VID_MASK) { fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN; + headers->cvlan_hdr.vlan_id = + cpu_to_be16(match.key->vlan_id & + VLAN_VID_MASK); } else { NL_SET_ERR_MSG_MOD(fltr->extack, "Bad CVLAN mask"); @@ -1171,10 +1353,12 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, } } - headers->cvlan_hdr.vlan_id = - cpu_to_be16(match.key->vlan_id & VLAN_VID_MASK); - if (match.mask->vlan_priority) - headers->cvlan_hdr.vlan_prio = match.key->vlan_priority; + if (match.mask->vlan_priority) { + fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN_PRIO; + headers->cvlan_hdr.vlan_prio = + cpu_to_be16((match.key->vlan_priority << + VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK); + } } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PPPOE)) { @@ -1217,6 +1401,22 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, return -EINVAL; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + + flow_rule_match_ip(rule, &match); + ice_tc_set_tos_ttl(&match, fltr, headers, false); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_L2TPV3)) { + struct flow_match_l2tpv3 match; + + flow_rule_match_l2tpv3(rule, &match); + + fltr->flags |= ICE_TC_FLWR_FIELD_L2TPV3_SESSID; + headers->l2tpv3_hdr.session_id = match.key->session_id; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 91cd3d3778c7..92642faad595 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -26,9 +26,18 @@ #define ICE_TC_FLWR_FIELD_CVLAN BIT(19) #define ICE_TC_FLWR_FIELD_PPPOE_SESSID BIT(20) #define ICE_TC_FLWR_FIELD_PPP_PROTO BIT(21) +#define ICE_TC_FLWR_FIELD_IP_TOS BIT(22) +#define ICE_TC_FLWR_FIELD_IP_TTL BIT(23) +#define ICE_TC_FLWR_FIELD_ENC_IP_TOS BIT(24) +#define ICE_TC_FLWR_FIELD_ENC_IP_TTL BIT(25) +#define ICE_TC_FLWR_FIELD_L2TPV3_SESSID BIT(26) +#define ICE_TC_FLWR_FIELD_VLAN_PRIO BIT(27) +#define ICE_TC_FLWR_FIELD_CVLAN_PRIO BIT(28) #define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF +#define ICE_IPV6_HDR_TC_MASK 0xFF00000 + struct ice_indr_block_priv { struct net_device *netdev; struct ice_netdev_priv *np; @@ -42,7 +51,7 @@ struct ice_tc_flower_action { struct ice_tc_vlan_hdr { __be16 vlan_id; /* Only last 12 bits valid */ - u16 vlan_prio; /* Only last 3 bits valid (valid values: 0..7) */ + __be16 vlan_prio; /* Only last 3 bits valid (valid values: 0..7) */ __be16 vlan_tpid; }; @@ -80,6 +89,10 @@ struct ice_tc_l3_hdr { u8 ttl; }; +struct ice_tc_l2tpv3_hdr { + __be32 session_id; +}; + struct ice_tc_l4_hdr { __be16 dst_port; __be16 src_port; @@ -92,6 +105,7 @@ struct ice_tc_flower_lyr_2_4_hdrs { struct ice_tc_vlan_hdr vlan_hdr; struct ice_tc_vlan_hdr cvlan_hdr; struct ice_tc_pppoe_hdr pppoe_hdr; + struct ice_tc_l2tpv3_hdr l2tpv3_hdr; /* L3 (IPv4[6]) layer fields with their mask */ struct ice_tc_l3_hdr l3_key; struct ice_tc_l3_hdr l3_mask; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 836dce840712..dbe80e5053a8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -610,7 +610,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, if (test_bit(ICE_VSI_DOWN, vsi->state)) return -ENETDOWN; - if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq) + if (!ice_is_xdp_ena_vsi(vsi)) return -ENXIO; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@ -621,6 +621,9 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdp_ring = vsi->xdp_rings[queue_index]; spin_lock(&xdp_ring->tx_lock); } else { + /* Generally, should not happen */ + if (unlikely(queue_index >= vsi->num_xdp_txq)) + return -ENXIO; xdp_ring = vsi->xdp_rings[queue_index]; } @@ -1464,7 +1467,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) bool wd; if (tx_ring->xsk_pool) - wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget); + wd = ice_xmit_zc(tx_ring); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -2255,8 +2258,10 @@ ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb, /* Grab an open timestamp slot */ idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb); - if (idx < 0) + if (idx < 0) { + tx_ring->vsi->back->ptp.tx_hwtstamp_skipped++; return; + } off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | (ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) | diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index ca902af54bb4..932b5661ec4d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -295,10 +295,11 @@ struct ice_rx_ring { struct xsk_buff_pool *xsk_pool; struct sk_buff *skb; dma_addr_t dma; /* physical address of ring */ -#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) u64 cached_phctime; u8 dcb_tc; /* Traffic class of ring */ u8 ptp_rx; +#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) +#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) u8 flags; } ____cacheline_internodealigned_in_smp; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 861b64322959..e1abfcee96dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -347,6 +347,7 @@ struct ice_ts_func_info { #define ICE_TS_DEV_ENA_M BIT(24) #define ICE_TS_TMR0_ENA_M BIT(25) #define ICE_TS_TMR1_ENA_M BIT(26) +#define ICE_TS_LL_TX_TS_READ_M BIT(28) struct ice_ts_dev_info { /* Device specific info */ @@ -359,6 +360,7 @@ struct ice_ts_dev_info { u8 ena; u8 tmr0_ena; u8 tmr1_ena; + u8 ts_ll_read; }; /* Function specific capabilities */ @@ -564,6 +566,8 @@ enum ice_rl_type { #define ICE_SCHED_INVAL_PROF_ID 0xFFFF #define ICE_SCHED_DFLT_BURST_SIZE (15 * 1024) /* in bytes (15k) */ +#define ICE_MAX_PORT_PER_PCI_DEV 8 + /* Data structure for saving BW information */ enum ice_bw_type { ICE_BW_TYPE_PRIO, @@ -885,8 +889,6 @@ struct ice_hw { /* INTRL granularity in 1 us */ u8 intrl_gran; - u8 ucast_shared; /* true if VSIs can share unicast addr */ - #define ICE_PHY_PER_NAC 1 #define ICE_MAX_QUAD 2 #define ICE_NUM_QUAD_TYPE 2 diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 49ba8bfdbf04..056c904b83cc 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -192,6 +192,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; + ice_clean_rx_ring(rx_ring); ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); @@ -243,7 +244,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_rxq(rx_ring); @@ -317,6 +318,62 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) } /** + * ice_realloc_rx_xdp_bufs - reallocate for either XSK or normal buffer + * @rx_ring: Rx ring + * @pool_present: is pool for XSK present + * + * Try allocating memory and return ENOMEM, if failed to allocate. + * If allocation was successful, substitute buffer with allocated one. + * Returns 0 on success, negative on failure + */ +static int +ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present) +{ + size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) : + sizeof(*rx_ring->rx_buf); + void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL); + + if (!sw_ring) + return -ENOMEM; + + if (pool_present) { + kfree(rx_ring->rx_buf); + rx_ring->rx_buf = NULL; + rx_ring->xdp_buf = sw_ring; + } else { + kfree(rx_ring->xdp_buf); + rx_ring->xdp_buf = NULL; + rx_ring->rx_buf = sw_ring; + } + + return 0; +} + +/** + * ice_realloc_zc_buf - reallocate XDP ZC queue pairs + * @vsi: Current VSI + * @zc: is zero copy set + * + * Reallocate buffer for rx_rings that might be used by XSK. + * XDP requires more memory, than rx_buf provides. + * Returns 0 on success, negative on failure + */ +int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc) +{ + struct ice_rx_ring *rx_ring; + unsigned long q; + + for_each_set_bit(q, vsi->af_xdp_zc_qps, + max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) { + rx_ring = vsi->rx_rings[q]; + if (ice_realloc_rx_xdp_bufs(rx_ring, zc)) + return -ENOMEM; + } + + return 0; +} + +/** * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state * @vsi: Current VSI * @pool: buffer pool to enable/associate to a ring, NULL to disable @@ -329,9 +386,8 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) bool if_running, pool_present = !!pool; int ret = 0, pool_failure = 0; - if (!is_power_of_2(vsi->rx_rings[qid]->count) || - !is_power_of_2(vsi->tx_rings[qid]->count)) { - netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n"); + if (qid >= vsi->num_rxq || qid >= vsi->num_txq) { + netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n"); pool_failure = -EINVAL; goto failure; } @@ -339,11 +395,17 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); if (if_running) { + struct ice_rx_ring *rx_ring = vsi->rx_rings[qid]; + ret = ice_qp_dis(vsi, qid); if (ret) { netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret); goto xsk_pool_if_up; } + + ret = ice_realloc_rx_xdp_bufs(rx_ring, pool_present); + if (ret) + goto xsk_pool_if_up; } pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) : @@ -353,7 +415,7 @@ xsk_pool_if_up: if (if_running) { ret = ice_qp_ena(vsi, qid); if (!ret && pool_present) - napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); + napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi); else if (ret) netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); } @@ -465,11 +527,10 @@ exit: bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) { u16 rx_thresh = ICE_RING_QUARTER(rx_ring); - u16 batched, leftover, i, tail_bumps; + u16 leftover, i, tail_bumps; - batched = ALIGN_DOWN(count, rx_thresh); - tail_bumps = batched / rx_thresh; - leftover = count & (rx_thresh - 1); + tail_bumps = count / rx_thresh; + leftover = count - (tail_bumps * rx_thresh); for (i = 0; i < tail_bumps; i++) if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) @@ -719,69 +780,57 @@ ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) } /** - * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring - * @xdp_ring: XDP ring to clean - * @napi_budget: amount of descriptors that NAPI allows us to clean - * - * Returns count of cleaned descriptors + * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ + * @xdp_ring: XDP Tx ring */ -static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) +static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) { - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); - int budget = napi_budget / tx_thresh; - u16 next_dd = xdp_ring->next_dd; - u16 ntc, cleared_dds = 0; - - do { - struct ice_tx_desc *next_dd_desc; - u16 desc_cnt = xdp_ring->count; - struct ice_tx_buf *tx_buf; - u32 xsk_frames; - u16 i; - - next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); - if (!(next_dd_desc->cmd_type_offset_bsz & - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) - break; + u16 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + u16 cnt = xdp_ring->count; + struct ice_tx_buf *tx_buf; + u16 xsk_frames = 0; + u16 last_rs; + int i; - cleared_dds++; - xsk_frames = 0; - if (likely(!xdp_ring->xdp_tx_active)) { - xsk_frames = tx_thresh; - goto skip; - } + last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1; + tx_desc = ICE_TX_DESC(xdp_ring, last_rs); + if ((tx_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) { + if (last_rs >= ntc) + xsk_frames = last_rs - ntc + 1; + else + xsk_frames = last_rs + cnt - ntc + 1; + } - ntc = xdp_ring->next_to_clean; + if (!xsk_frames) + return; - for (i = 0; i < tx_thresh; i++) { - tx_buf = &xdp_ring->tx_buf[ntc]; + if (likely(!xdp_ring->xdp_tx_active)) + goto skip; - if (tx_buf->raw_buf) { - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); - tx_buf->raw_buf = NULL; - } else { - xsk_frames++; - } + ntc = xdp_ring->next_to_clean; + for (i = 0; i < xsk_frames; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; - ntc++; - if (ntc >= xdp_ring->count) - ntc = 0; + if (tx_buf->raw_buf) { + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + tx_buf->raw_buf = NULL; + } else { + xsk_frames++; } + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } skip: - xdp_ring->next_to_clean += tx_thresh; - if (xdp_ring->next_to_clean >= desc_cnt) - xdp_ring->next_to_clean -= desc_cnt; - if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); - next_dd_desc->cmd_type_offset_bsz = 0; - next_dd = next_dd + tx_thresh; - if (next_dd >= desc_cnt) - next_dd = tx_thresh - 1; - } while (--budget); - - xdp_ring->next_dd = next_dd; - - return cleared_dds * tx_thresh; + tx_desc->cmd_type_offset_bsz = 0; + xdp_ring->next_to_clean += xsk_frames; + if (xdp_ring->next_to_clean >= cnt) + xdp_ring->next_to_clean -= cnt; + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); } /** @@ -816,7 +865,6 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, unsigned int *total_bytes) { - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 ntu = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; u32 i; @@ -836,13 +884,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de } xdp_ring->next_to_use = ntu; - - if (xdp_ring->next_to_use > xdp_ring->next_rs) { - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += tx_thresh; - } } /** @@ -855,7 +896,6 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, u32 nb_pkts, unsigned int *total_bytes) { - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u32 batched, leftover, i; batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); @@ -864,54 +904,54 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); for (; i < batched + leftover; i++) ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); +} - if (xdp_ring->next_to_use > xdp_ring->next_rs) { - struct ice_tx_desc *tx_desc; +/** + * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU) + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + */ +static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring) +{ + u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; + struct ice_tx_desc *tx_desc; - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += tx_thresh; - } + tx_desc = ICE_TX_DESC(xdp_ring, ntu); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); } /** * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on - * @budget: number of free descriptors on HW Tx ring that can be used - * @napi_budget: amount of descriptors that NAPI allows us to clean * * Returns true if there is no more work that needs to be done, false otherwise */ -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget) +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring) { struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; + int budget; + + ice_clean_xdp_irq_zc(xdp_ring); - if (budget < tx_thresh) - budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget); + budget = ICE_DESC_UNUSED(xdp_ring); + budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring)); nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); if (!nb_pkts) return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { - struct ice_tx_desc *tx_desc; - nb_processed = xdp_ring->count - xdp_ring->next_to_use; ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs = tx_thresh - 1; xdp_ring->next_to_use = 0; } ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, &total_bytes); + ice_set_rs_bit(xdp_ring); ice_xdp_ring_update_tail(xdp_ring); ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); @@ -944,13 +984,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, if (!ice_is_xdp_ena_vsi(vsi)) return -EINVAL; - if (queue_id >= vsi->num_txq) + if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq) return -EINVAL; - if (!vsi->xdp_rings[queue_id]->xsk_pool) - return -EINVAL; + ring = vsi->rx_rings[queue_id]->xdp_ring; - ring = vsi->xdp_rings[queue_id]; + if (!ring->xsk_pool) + return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so * it will run again. If not, trigger an interrupt and @@ -989,14 +1029,16 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) */ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { - u16 count_mask = rx_ring->count - 1; u16 ntc = rx_ring->next_to_clean; u16 ntu = rx_ring->next_to_use; - for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { + while (ntc != ntu) { struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc); xsk_buff_free(xdp); + ntc++; + if (ntc >= rx_ring->count) + ntc = 0; } } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 21faec8e97db..6fa181f080ef 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -26,12 +26,10 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring); +int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); #else -static inline bool -ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, - u32 __always_unused budget, - int __always_unused napi_budget) +static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring) { return false; } @@ -72,5 +70,12 @@ ice_xsk_wakeup(struct net_device __always_unused *netdev, static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { } static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { } + +static inline int +ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi, + bool __always_unused zc) +{ + return 0; +} #endif /* CONFIG_XDP_SOCKETS */ #endif /* !_ICE_XSK_H_ */ |