diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_main.c')
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_main.c | 3626 |
1 files changed, 1046 insertions, 2580 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5299caf55a7f..8f61b375e768 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6,8 +6,9 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "ice.h" +#include "ice_lib.h" -#define DRV_VERSION "ice-0.7.0-k" +#define DRV_VERSION "0.7.2-k" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" const char ice_drv_ver[] = DRV_VERSION; static const char ice_driver_string[] = DRV_SUMMARY; @@ -15,7 +16,7 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION(DRV_SUMMARY); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_VERSION(DRV_VERSION); static int debug = -1; @@ -31,173 +32,84 @@ static const struct net_device_ops ice_netdev_ops; static void ice_pf_dis_all_vsi(struct ice_pf *pf); static void ice_rebuild(struct ice_pf *pf); -static int ice_vsi_release(struct ice_vsi *vsi); + +static void ice_vsi_release_all(struct ice_pf *pf); static void ice_update_vsi_stats(struct ice_vsi *vsi); static void ice_update_pf_stats(struct ice_pf *pf); /** - * ice_get_free_slot - get the next non-NULL location index in array - * @array: array to search - * @size: size of the array - * @curr: last known occupied index to be used as a search hint - * - * void * is being used to keep the functionality generic. This lets us use this - * function on any array of pointers. + * ice_get_tx_pending - returns number of Tx descriptors not processed + * @ring: the ring of descriptors */ -static int ice_get_free_slot(void *array, int size, int curr) +static u32 ice_get_tx_pending(struct ice_ring *ring) { - int **tmp_array = (int **)array; - int next; + u32 head, tail; - if (curr < (size - 1) && !tmp_array[curr + 1]) { - next = curr + 1; - } else { - int i = 0; + head = ring->next_to_clean; + tail = readl(ring->tail); - while ((i < size) && (tmp_array[i])) - i++; - if (i == size) - next = ICE_NO_VSI; - else - next = i; - } - return next; + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + return 0; } /** - * ice_search_res - Search the tracker for a block of resources - * @res: pointer to the resource - * @needed: size of the block needed - * @id: identifier to track owner - * Returns the base item index of the block, or -ENOMEM for error + * ice_check_for_hang_subtask - check for and recover hung queues + * @pf: pointer to PF struct */ -static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) +static void ice_check_for_hang_subtask(struct ice_pf *pf) { - int start = res->search_hint; - int end = start; - - id |= ICE_RES_VALID_BIT; + struct ice_vsi *vsi = NULL; + unsigned int i; + u32 v, v_idx; + int packets; - do { - /* skip already allocated entries */ - if (res->list[end++] & ICE_RES_VALID_BIT) { - start = end; - if ((start + needed) > res->num_entries) - break; + ice_for_each_vsi(pf, v) + if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) { + vsi = pf->vsi[v]; + break; } - if (end == (start + needed)) { - int i = start; + if (!vsi || test_bit(__ICE_DOWN, vsi->state)) + return; - /* there was enough, so assign it to the requestor */ - while (i != end) - res->list[i++] = id; + if (!(vsi->netdev && netif_carrier_ok(vsi->netdev))) + return; - if (end == res->num_entries) - end = 0; + for (i = 0; i < vsi->num_txq; i++) { + struct ice_ring *tx_ring = vsi->tx_rings[i]; + + if (tx_ring && tx_ring->desc) { + int itr = ICE_ITR_NONE; + + /* If packet counter has not changed the queue is + * likely stalled, so force an interrupt for this + * queue. + * + * prev_pkt would be negative if there was no + * pending work. + */ + packets = tx_ring->stats.pkts & INT_MAX; + if (tx_ring->tx_stats.prev_pkt == packets) { + /* Trigger sw interrupt to revive the queue */ + v_idx = tx_ring->q_vector->v_idx; + wr32(&vsi->back->hw, + GLINT_DYN_CTL(vsi->hw_base_vector + v_idx), + (itr << GLINT_DYN_CTL_ITR_INDX_S) | + GLINT_DYN_CTL_SWINT_TRIG_M | + GLINT_DYN_CTL_INTENA_MSK_M); + continue; + } - res->search_hint = end; - return start; + /* Memory barrier between read of packet count and call + * to ice_get_tx_pending() + */ + smp_rmb(); + tx_ring->tx_stats.prev_pkt = + ice_get_tx_pending(tx_ring) ? packets : -1; } - } while (1); - - return -ENOMEM; -} - -/** - * ice_get_res - get a block of resources - * @pf: board private structure - * @res: pointer to the resource - * @needed: size of the block needed - * @id: identifier to track owner - * - * Returns the base item index of the block, or -ENOMEM for error - * The search_hint trick and lack of advanced fit-finding only works - * because we're highly likely to have all the same sized requests. - * Linear search time and any fragmentation should be minimal. - */ -static int -ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) -{ - int ret; - - if (!res || !pf) - return -EINVAL; - - if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { - dev_err(&pf->pdev->dev, - "param err: needed=%d, num_entries = %d id=0x%04x\n", - needed, res->num_entries, id); - return -EINVAL; - } - - /* search based on search_hint */ - ret = ice_search_res(res, needed, id); - - if (ret < 0) { - /* previous search failed. Reset search hint and try again */ - res->search_hint = 0; - ret = ice_search_res(res, needed, id); - } - - return ret; -} - -/** - * ice_free_res - free a block of resources - * @res: pointer to the resource - * @index: starting index previously returned by ice_get_res - * @id: identifier to track owner - * Returns number of resources freed - */ -static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) -{ - int count = 0; - int i; - - if (!res || index >= res->num_entries) - return -EINVAL; - - id |= ICE_RES_VALID_BIT; - for (i = index; i < res->num_entries && res->list[i] == id; i++) { - res->list[i] = 0; - count++; } - - return count; -} - -/** - * ice_add_mac_to_list - Add a mac address filter entry to the list - * @vsi: the VSI to be forwarded to - * @add_list: pointer to the list which contains MAC filter entries - * @macaddr: the MAC address to be added. - * - * Adds mac address filter entry to the temp list - * - * Returns 0 on success or ENOMEM on failure. - */ -static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, - const u8 *macaddr) -{ - struct ice_fltr_list_entry *tmp; - struct ice_pf *pf = vsi->back; - - tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC); - if (!tmp) - return -ENOMEM; - - tmp->fltr_info.flag = ICE_FLTR_TX; - tmp->fltr_info.src = vsi->vsi_num; - tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC; - tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; - tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num; - ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr); - - INIT_LIST_HEAD(&tmp->list_entry); - list_add(&tmp->list_entry, add_list); - - return 0; } /** @@ -243,24 +155,6 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) } /** - * ice_free_fltr_list - free filter lists helper - * @dev: pointer to the device struct - * @h: pointer to the list head to be freed - * - * Helper function to free filter lists previously created using - * ice_add_mac_to_list - */ -static void ice_free_fltr_list(struct device *dev, struct list_head *h) -{ - struct ice_fltr_list_entry *e, *tmp; - - list_for_each_entry_safe(e, tmp, h, list_entry) { - list_del(&e->list_entry); - devm_kfree(dev, e); - } -} - -/** * ice_vsi_fltr_changed - check if filter state changed * @vsi: VSI to be checked * @@ -359,7 +253,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); if (vsi->current_netdev_flags & IFF_PROMISC) { /* Apply TX filter rule to get traffic from VMs */ - status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true, + status = ice_cfg_dflt_vsi(hw, vsi->idx, true, ICE_FLTR_TX); if (status) { netdev_err(netdev, "Error setting default VSI %i tx rule\n", @@ -369,7 +263,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) goto out_promisc; } /* Apply RX filter rule to get traffic from wire */ - status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true, + status = ice_cfg_dflt_vsi(hw, vsi->idx, true, ICE_FLTR_RX); if (status) { netdev_err(netdev, "Error setting default VSI %i rx rule\n", @@ -380,7 +274,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } } else { /* Clear TX filter rule to stop traffic from VMs */ - status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false, + status = ice_cfg_dflt_vsi(hw, vsi->idx, false, ICE_FLTR_TX); if (status) { netdev_err(netdev, "Error clearing default VSI %i tx rule\n", @@ -389,8 +283,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) err = -EIO; goto out_promisc; } - /* Clear filter RX to remove traffic from wire */ - status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false, + /* Clear RX filter to remove traffic from wire */ + status = ice_cfg_dflt_vsi(hw, vsi->idx, false, ICE_FLTR_RX); if (status) { netdev_err(netdev, "Error clearing default VSI %i rx rule\n", @@ -438,15 +332,6 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf) } /** - * ice_is_reset_recovery_pending - schedule a reset - * @state: pf state field - */ -static bool ice_is_reset_recovery_pending(unsigned long int *state) -{ - return test_bit(__ICE_RESET_RECOVERY_PENDING, state); -} - -/** * ice_prepare_for_reset - prep for the core to reset * @pf: board private structure * @@ -456,23 +341,17 @@ static void ice_prepare_for_reset(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - u32 v; - - ice_for_each_vsi(pf, v) - if (pf->vsi[v]) - ice_remove_vsi_fltr(hw, pf->vsi[v]->vsi_num); - dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n"); + /* Notify VFs of impending reset */ + if (ice_check_sq_alive(hw, &hw->mailboxq)) + ice_vc_notify_reset(pf); /* disable the VSIs and their queues that are not already DOWN */ - /* pf_dis_all_vsi modifies netdev structures -rtnl_lock needed */ ice_pf_dis_all_vsi(pf); - ice_for_each_vsi(pf, v) - if (pf->vsi[v]) - pf->vsi[v]->vsi_num = 0; - ice_shutdown_all_ctrlq(hw); + + set_bit(__ICE_PREPARED_FOR_RESET, pf->state); } /** @@ -489,27 +368,29 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); WARN_ON(in_interrupt()); - /* PFR is a bit of a special case because it doesn't result in an OICR - * interrupt. So for PFR, we prepare for reset, issue the reset and - * rebuild sequentially. - */ - if (reset_type == ICE_RESET_PFR) { - set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); - ice_prepare_for_reset(pf); - } + ice_prepare_for_reset(pf); /* trigger the reset */ if (ice_reset(hw, reset_type)) { dev_err(dev, "reset %d failed\n", reset_type); set_bit(__ICE_RESET_FAILED, pf->state); - clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + clear_bit(__ICE_RESET_OICR_RECV, pf->state); + clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(__ICE_PFR_REQ, pf->state); + clear_bit(__ICE_CORER_REQ, pf->state); + clear_bit(__ICE_GLOBR_REQ, pf->state); return; } + /* PFR is a bit of a special case because it doesn't result in an OICR + * interrupt. So for PFR, rebuild after the reset and clear the reset- + * associated state bits. + */ if (reset_type == ICE_RESET_PFR) { pf->pfr_count++; ice_rebuild(pf); - clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(__ICE_PFR_REQ, pf->state); } } @@ -519,48 +400,60 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) */ static void ice_reset_subtask(struct ice_pf *pf) { - enum ice_reset_req reset_type; - - rtnl_lock(); + enum ice_reset_req reset_type = ICE_RESET_INVAL; /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an - * OICR interrupt. The OICR handler (ice_misc_intr) determines what - * type of reset happened and sets __ICE_RESET_RECOVERY_PENDING bit in - * pf->state. So if reset/recovery is pending (as indicated by this bit) - * we do a rebuild and return. + * OICR interrupt. The OICR handler (ice_misc_intr) determines what type + * of reset is pending and sets bits in pf->state indicating the reset + * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set + * prepare for pending reset if not already (for PF software-initiated + * global resets the software should already be prepared for it as + * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated + * by firmware or software on other PFs, that bit is not set so prepare + * for the reset now), poll for reset done, rebuild and return. */ - if (ice_is_reset_recovery_pending(pf->state)) { + if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) { clear_bit(__ICE_GLOBR_RECV, pf->state); clear_bit(__ICE_CORER_RECV, pf->state); - ice_prepare_for_reset(pf); + if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) + ice_prepare_for_reset(pf); /* make sure we are ready to rebuild */ - if (ice_check_reset(&pf->hw)) + if (ice_check_reset(&pf->hw)) { set_bit(__ICE_RESET_FAILED, pf->state); - else + } else { + /* done with reset. start rebuild */ + pf->hw.reset_ongoing = false; ice_rebuild(pf); - clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); - goto unlock; + /* clear bit to resume normal operations, but + * ICE_NEEDS_RESTART bit is set incase rebuild failed + */ + clear_bit(__ICE_RESET_OICR_RECV, pf->state); + clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(__ICE_PFR_REQ, pf->state); + clear_bit(__ICE_CORER_REQ, pf->state); + clear_bit(__ICE_GLOBR_REQ, pf->state); + } + + return; } /* No pending resets to finish processing. Check for new resets */ - if (test_and_clear_bit(__ICE_GLOBR_REQ, pf->state)) - reset_type = ICE_RESET_GLOBR; - else if (test_and_clear_bit(__ICE_CORER_REQ, pf->state)) - reset_type = ICE_RESET_CORER; - else if (test_and_clear_bit(__ICE_PFR_REQ, pf->state)) + if (test_bit(__ICE_PFR_REQ, pf->state)) reset_type = ICE_RESET_PFR; - else - goto unlock; + if (test_bit(__ICE_CORER_REQ, pf->state)) + reset_type = ICE_RESET_CORER; + if (test_bit(__ICE_GLOBR_REQ, pf->state)) + reset_type = ICE_RESET_GLOBR; + /* If no valid reset type requested just return */ + if (reset_type == ICE_RESET_INVAL) + return; - /* reset if not already down or resetting */ + /* reset if not already down or busy */ if (!test_bit(__ICE_DOWN, pf->state) && !test_bit(__ICE_CFG_BUSY, pf->state)) { ice_do_reset(pf, reset_type); } - -unlock: - rtnl_unlock(); } /** @@ -772,6 +665,8 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi) } } + ice_vc_notify_link_state(pf); + return 0; } @@ -822,6 +717,10 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) cq = &hw->adminq; qtype = "Admin"; break; + case ICE_CTL_Q_MAILBOX: + cq = &hw->mailboxq; + qtype = "Mailbox"; + break; default: dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n", q_type); @@ -901,7 +800,13 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) case ice_aqc_opc_get_link_status: if (ice_handle_link_event(pf)) dev_err(&pf->pdev->dev, - "Could not handle link event"); + "Could not handle link event\n"); + break; + case ice_mbx_opc_send_msg_to_pf: + ice_vc_process_vf_msg(pf, &event); + break; + case ice_aqc_opc_fw_logging: + ice_output_fw_log(hw, &event.desc, event.msg_buf); break; default: dev_dbg(&pf->pdev->dev, @@ -917,13 +822,27 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) } /** + * ice_ctrlq_pending - check if there is a difference between ntc and ntu + * @hw: pointer to hardware info + * @cq: control queue information + * + * returns true if there are pending messages in a queue, false if there aren't + */ +static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq) +{ + u16 ntu; + + ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); + return cq->rq.next_to_clean != ntu; +} + +/** * ice_clean_adminq_subtask - clean the AdminQ rings * @pf: board private structure */ static void ice_clean_adminq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - u32 val; if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) return; @@ -933,9 +852,35 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); - /* re-enable Admin queue interrupt causes */ - val = rd32(hw, PFINT_FW_CTL); - wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M)); + /* There might be a situation where new messages arrive to a control + * queue between processing the last message and clearing the + * EVENT_PENDING bit. So before exiting, check queue head again (using + * ice_ctrlq_pending) and process new messages if any. + */ + if (ice_ctrlq_pending(hw, &hw->adminq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN); + + ice_flush(hw); +} + +/** + * ice_clean_mailboxq_subtask - clean the MailboxQ rings + * @pf: board private structure + */ +static void ice_clean_mailboxq_subtask(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + + if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state)) + return; + + if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX)) + return; + + clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); + + if (ice_ctrlq_pending(hw, &hw->mailboxq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX); ice_flush(hw); } @@ -948,8 +893,9 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) */ static void ice_service_task_schedule(struct ice_pf *pf) { - if (!test_bit(__ICE_DOWN, pf->state) && - !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state)) + if (!test_bit(__ICE_SERVICE_DIS, pf->state) && + !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) && + !test_bit(__ICE_NEEDS_RESTART, pf->state)) queue_work(ice_wq, &pf->serv_task); } @@ -967,6 +913,22 @@ static void ice_service_task_complete(struct ice_pf *pf) } /** + * ice_service_task_stop - stop service task and cancel works + * @pf: board private structure + */ +static void ice_service_task_stop(struct ice_pf *pf) +{ + set_bit(__ICE_SERVICE_DIS, pf->state); + + if (pf->serv_tmr.function) + del_timer_sync(&pf->serv_tmr); + if (pf->serv_task.func) + cancel_work_sync(&pf->serv_task); + + clear_bit(__ICE_SERVICE_SCHED, pf->state); +} + +/** * ice_service_timer - timer callback to schedule service task * @t: pointer to timer_list */ @@ -979,6 +941,160 @@ static void ice_service_timer(struct timer_list *t) } /** + * ice_handle_mdd_event - handle malicious driver detect event + * @pf: pointer to the PF structure + * + * Called from service task. OICR interrupt handler indicates MDD event + */ +static void ice_handle_mdd_event(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + bool mdd_detected = false; + u32 reg; + int i; + + if (!test_bit(__ICE_MDD_EVENT_PENDING, pf->state)) + return; + + /* find what triggered the MDD event */ + reg = rd32(hw, GL_MDET_TX_PQM); + if (reg & GL_MDET_TX_PQM_VALID_M) { + u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> + GL_MDET_TX_PQM_PF_NUM_S; + u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> + GL_MDET_TX_PQM_VF_NUM_S; + u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> + GL_MDET_TX_PQM_MAL_TYPE_S; + u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >> + GL_MDET_TX_PQM_QNUM_S); + + if (netif_msg_tx_err(pf)) + dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", + event, queue, pf_num, vf_num); + wr32(hw, GL_MDET_TX_PQM, 0xffffffff); + mdd_detected = true; + } + + reg = rd32(hw, GL_MDET_TX_TCLAN); + if (reg & GL_MDET_TX_TCLAN_VALID_M) { + u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> + GL_MDET_TX_TCLAN_PF_NUM_S; + u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> + GL_MDET_TX_TCLAN_VF_NUM_S; + u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> + GL_MDET_TX_TCLAN_MAL_TYPE_S; + u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >> + GL_MDET_TX_TCLAN_QNUM_S); + + if (netif_msg_rx_err(pf)) + dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", + event, queue, pf_num, vf_num); + wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); + mdd_detected = true; + } + + reg = rd32(hw, GL_MDET_RX); + if (reg & GL_MDET_RX_VALID_M) { + u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >> + GL_MDET_RX_PF_NUM_S; + u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> + GL_MDET_RX_VF_NUM_S; + u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >> + GL_MDET_RX_MAL_TYPE_S; + u16 queue = ((reg & GL_MDET_RX_QNUM_M) >> + GL_MDET_RX_QNUM_S); + + if (netif_msg_rx_err(pf)) + dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", + event, queue, pf_num, vf_num); + wr32(hw, GL_MDET_RX, 0xffffffff); + mdd_detected = true; + } + + if (mdd_detected) { + bool pf_mdd_detected = false; + + reg = rd32(hw, PF_MDET_TX_PQM); + if (reg & PF_MDET_TX_PQM_VALID_M) { + wr32(hw, PF_MDET_TX_PQM, 0xFFFF); + dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); + pf_mdd_detected = true; + } + + reg = rd32(hw, PF_MDET_TX_TCLAN); + if (reg & PF_MDET_TX_TCLAN_VALID_M) { + wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF); + dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n"); + pf_mdd_detected = true; + } + + reg = rd32(hw, PF_MDET_RX); + if (reg & PF_MDET_RX_VALID_M) { + wr32(hw, PF_MDET_RX, 0xFFFF); + dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n"); + pf_mdd_detected = true; + } + /* Queue belongs to the PF initiate a reset */ + if (pf_mdd_detected) { + set_bit(__ICE_NEEDS_RESTART, pf->state); + ice_service_task_schedule(pf); + } + } + + /* see if one of the VFs needs to be reset */ + for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) { + struct ice_vf *vf = &pf->vf[i]; + + reg = rd32(hw, VP_MDET_TX_PQM(i)); + if (reg & VP_MDET_TX_PQM_VALID_M) { + wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); + vf->num_mdd_events++; + dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + i); + } + + reg = rd32(hw, VP_MDET_TX_TCLAN(i)); + if (reg & VP_MDET_TX_TCLAN_VALID_M) { + wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); + vf->num_mdd_events++; + dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + i); + } + + reg = rd32(hw, VP_MDET_TX_TDPU(i)); + if (reg & VP_MDET_TX_TDPU_VALID_M) { + wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); + vf->num_mdd_events++; + dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", + i); + } + + reg = rd32(hw, VP_MDET_RX(i)); + if (reg & VP_MDET_RX_VALID_M) { + wr32(hw, VP_MDET_RX(i), 0xFFFF); + vf->num_mdd_events++; + dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n", + i); + } + + if (vf->num_mdd_events > ICE_DFLT_NUM_MDD_EVENTS_ALLOWED) { + dev_info(&pf->pdev->dev, + "Too many MDD events on VF %d, disabled\n", i); + dev_info(&pf->pdev->dev, + "Use PF Control I/F to re-enable the VF\n"); + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + } + } + + /* re-enable MDD interrupt cause */ + clear_bit(__ICE_MDD_EVENT_PENDING, pf->state); + reg = rd32(hw, PFINT_OICR_ENA); + reg |= PFINT_OICR_MAL_DETECT_M; + wr32(hw, PFINT_OICR_ENA, reg); + ice_flush(hw); +} + +/** * ice_service_task - manage and run subtasks * @work: pointer to work_struct contained by the PF struct */ @@ -992,16 +1108,21 @@ static void ice_service_task(struct work_struct *work) /* process reset requests first */ ice_reset_subtask(pf); - /* bail if a reset/recovery cycle is pending */ - if (ice_is_reset_recovery_pending(pf->state) || - test_bit(__ICE_SUSPENDED, pf->state)) { + /* bail if a reset/recovery cycle is pending or rebuild failed */ + if (ice_is_reset_in_progress(pf->state) || + test_bit(__ICE_SUSPENDED, pf->state) || + test_bit(__ICE_NEEDS_RESTART, pf->state)) { ice_service_task_complete(pf); return; } + ice_check_for_hang_subtask(pf); ice_sync_fltr_subtask(pf); + ice_handle_mdd_event(pf); + ice_process_vflr_event(pf); ice_watchdog_subtask(pf); ice_clean_adminq_subtask(pf); + ice_clean_mailboxq_subtask(pf); /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ ice_service_task_complete(pf); @@ -1011,6 +1132,9 @@ static void ice_service_task(struct work_struct *work) * schedule the service task now. */ if (time_after(jiffies, (start_time + pf->serv_tmr_period)) || + test_bit(__ICE_MDD_EVENT_PENDING, pf->state) || + test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || + test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) || test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@ -1025,6 +1149,10 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) hw->adminq.num_sq_entries = ICE_AQ_LEN; hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN; hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; + hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN; + hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN; + hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN; + hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN; } /** @@ -1055,57 +1183,6 @@ static void ice_irq_affinity_notify(struct irq_affinity_notify *notify, static void ice_irq_affinity_release(struct kref __always_unused *ref) {} /** - * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI - * @vsi: the VSI being un-configured - */ -static void ice_vsi_dis_irq(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - int base = vsi->base_vector; - u32 val; - int i; - - /* disable interrupt causation from each queue */ - if (vsi->tx_rings) { - ice_for_each_txq(vsi, i) { - if (vsi->tx_rings[i]) { - u16 reg; - - reg = vsi->tx_rings[i]->reg_idx; - val = rd32(hw, QINT_TQCTL(reg)); - val &= ~QINT_TQCTL_CAUSE_ENA_M; - wr32(hw, QINT_TQCTL(reg), val); - } - } - } - - if (vsi->rx_rings) { - ice_for_each_rxq(vsi, i) { - if (vsi->rx_rings[i]) { - u16 reg; - - reg = vsi->rx_rings[i]->reg_idx; - val = rd32(hw, QINT_RQCTL(reg)); - val &= ~QINT_RQCTL_CAUSE_ENA_M; - wr32(hw, QINT_RQCTL(reg), val); - } - } - } - - /* disable each interrupt */ - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - for (i = vsi->base_vector; - i < (vsi->num_q_vectors + vsi->base_vector); i++) - wr32(hw, GLINT_DYN_CTL(i), 0); - - ice_flush(hw); - for (i = 0; i < vsi->num_q_vectors; i++) - synchronize_irq(pf->msix_entries[i + base].vector); - } -} - -/** * ice_vsi_ena_irq - Enable IRQ for the given VSI * @vsi: the VSI being configured */ @@ -1126,26 +1203,6 @@ static int ice_vsi_ena_irq(struct ice_vsi *vsi) } /** - * ice_vsi_delete - delete a VSI from the switch - * @vsi: pointer to VSI being removed - */ -static void ice_vsi_delete(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - struct ice_vsi_ctx ctxt; - enum ice_status status; - - ctxt.vsi_num = vsi->vsi_num; - - memcpy(&ctxt.info, &vsi->info, sizeof(struct ice_aqc_vsi_props)); - - status = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL); - if (status) - dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n", - vsi->vsi_num); -} - -/** * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI * @vsi: the VSI being configured * @basename: name for the vector @@ -1154,7 +1211,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) { int q_vectors = vsi->num_q_vectors; struct ice_pf *pf = vsi->back; - int base = vsi->base_vector; + int base = vsi->sw_base_vector; int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; @@ -1213,469 +1270,6 @@ free_q_irqs: } /** - * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type - * @vsi: the VSI being configured - */ -static void ice_vsi_set_rss_params(struct ice_vsi *vsi) -{ - struct ice_hw_common_caps *cap; - struct ice_pf *pf = vsi->back; - - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { - vsi->rss_size = 1; - return; - } - - cap = &pf->hw.func_caps.common_cap; - switch (vsi->type) { - case ICE_VSI_PF: - /* PF VSI will inherit RSS instance of PF */ - vsi->rss_table_size = cap->rss_table_size; - vsi->rss_size = min_t(int, num_online_cpus(), - BIT(cap->rss_table_entry_width)); - vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; - break; - default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); - break; - } -} - -/** - * ice_vsi_setup_q_map - Setup a VSI queue map - * @vsi: the VSI being configured - * @ctxt: VSI context structure - */ -static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) -{ - u16 offset = 0, qmap = 0, numq_tc; - u16 pow = 0, max_rss = 0, qcount; - u16 qcount_tx = vsi->alloc_txq; - u16 qcount_rx = vsi->alloc_rxq; - bool ena_tc0 = false; - int i; - - /* at least TC0 should be enabled by default */ - if (vsi->tc_cfg.numtc) { - if (!(vsi->tc_cfg.ena_tc & BIT(0))) - ena_tc0 = true; - } else { - ena_tc0 = true; - } - - if (ena_tc0) { - vsi->tc_cfg.numtc++; - vsi->tc_cfg.ena_tc |= 1; - } - - numq_tc = qcount_rx / vsi->tc_cfg.numtc; - - /* TC mapping is a function of the number of Rx queues assigned to the - * VSI for each traffic class and the offset of these queues. - * The first 10 bits are for queue offset for TC0, next 4 bits for no:of - * queues allocated to TC0. No:of queues is a power-of-2. - * - * If TC is not enabled, the queue offset is set to 0, and allocate one - * queue, this way, traffic for the given TC will be sent to the default - * queue. - * - * Setup number and offset of Rx queues for all TCs for the VSI - */ - - /* qcount will change if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) { - if (vsi->type == ICE_VSI_PF) - max_rss = ICE_MAX_LG_RSS_QS; - else - max_rss = ICE_MAX_SMALL_RSS_QS; - - qcount = min_t(int, numq_tc, max_rss); - qcount = min_t(int, qcount, vsi->rss_size); - } else { - qcount = numq_tc; - } - - /* find higher power-of-2 of qcount */ - pow = ilog2(qcount); - - if (!is_power_of_2(qcount)) - pow++; - - for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { - if (!(vsi->tc_cfg.ena_tc & BIT(i))) { - /* TC is not enabled */ - vsi->tc_cfg.tc_info[i].qoffset = 0; - vsi->tc_cfg.tc_info[i].qcount = 1; - ctxt->info.tc_mapping[i] = 0; - continue; - } - - /* TC is enabled */ - vsi->tc_cfg.tc_info[i].qoffset = offset; - vsi->tc_cfg.tc_info[i].qcount = qcount; - - qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & - ICE_AQ_VSI_TC_Q_OFFSET_M) | - ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & - ICE_AQ_VSI_TC_Q_NUM_M); - offset += qcount; - ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); - } - - vsi->num_txq = qcount_tx; - vsi->num_rxq = offset; - - /* Rx queue mapping */ - ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG); - /* q_mapping buffer holds the info for the first queue allocated for - * this VSI in the PF space and also the number of queues associated - * with this VSI. - */ - ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); - ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq); -} - -/** - * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI - * @ctxt: the VSI context being set - * - * This initializes a default VSI context for all sections except the Queues. - */ -static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) -{ - u32 table = 0; - - memset(&ctxt->info, 0, sizeof(ctxt->info)); - /* VSI's should be allocated from shared pool */ - ctxt->alloc_from_pool = true; - /* Src pruning enabled by default */ - ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; - /* Traffic from VSI can be sent to LAN */ - ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; - /* Allow all packets untagged/tagged */ - ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL & - ICE_AQ_VSI_PVLAN_MODE_M) >> - ICE_AQ_VSI_PVLAN_MODE_S); - /* Show VLAN/UP from packets in Rx descriptors */ - ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH & - ICE_AQ_VSI_PVLAN_EMOD_M) >> - ICE_AQ_VSI_PVLAN_EMOD_S); - /* Have 1:1 UP mapping for both ingress/egress tables */ - table |= ICE_UP_TABLE_TRANSLATE(0, 0); - table |= ICE_UP_TABLE_TRANSLATE(1, 1); - table |= ICE_UP_TABLE_TRANSLATE(2, 2); - table |= ICE_UP_TABLE_TRANSLATE(3, 3); - table |= ICE_UP_TABLE_TRANSLATE(4, 4); - table |= ICE_UP_TABLE_TRANSLATE(5, 5); - table |= ICE_UP_TABLE_TRANSLATE(6, 6); - table |= ICE_UP_TABLE_TRANSLATE(7, 7); - ctxt->info.ingress_table = cpu_to_le32(table); - ctxt->info.egress_table = cpu_to_le32(table); - /* Have 1:1 UP mapping for outer to inner UP table */ - ctxt->info.outer_up_table = cpu_to_le32(table); - /* No Outer tag support outer_tag_flags remains to zero */ -} - -/** - * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI - * @ctxt: the VSI context being set - * @vsi: the VSI being configured - */ -static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) -{ - u8 lut_type, hash_type; - - switch (vsi->type) { - case ICE_VSI_PF: - /* PF VSI will inherit RSS instance of PF */ - lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; - hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; - break; - default: - dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", - vsi->type); - return; - } - - ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & - ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | - ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & - ICE_AQ_VSI_Q_OPT_RSS_HASH_M); -} - -/** - * ice_vsi_add - Create a new VSI or fetch preallocated VSI - * @vsi: the VSI being configured - * - * This initializes a VSI context depending on the VSI type to be added and - * passes it down to the add_vsi aq command to create a new VSI. - */ -static int ice_vsi_add(struct ice_vsi *vsi) -{ - struct ice_vsi_ctx ctxt = { 0 }; - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - int ret = 0; - - switch (vsi->type) { - case ICE_VSI_PF: - ctxt.flags = ICE_AQ_VSI_TYPE_PF; - break; - default: - return -ENODEV; - } - - ice_set_dflt_vsi_ctx(&ctxt); - /* if the switch is in VEB mode, allow VSI loopback */ - if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB) - ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; - - /* Set LUT type and HASH type if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) - ice_set_rss_vsi_ctx(&ctxt, vsi); - - ctxt.info.sw_id = vsi->port_info->sw_id; - ice_vsi_setup_q_map(vsi, &ctxt); - - ret = ice_aq_add_vsi(hw, &ctxt, NULL); - if (ret) { - dev_err(&vsi->back->pdev->dev, - "Add VSI AQ call failed, err %d\n", ret); - return -EIO; - } - vsi->info = ctxt.info; - vsi->vsi_num = ctxt.vsi_num; - - return ret; -} - -/** - * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW - * @vsi: the VSI being cleaned up - */ -static void ice_vsi_release_msix(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - u16 vector = vsi->base_vector; - struct ice_hw *hw = &pf->hw; - u32 txq = 0; - u32 rxq = 0; - int i, q; - - for (i = 0; i < vsi->num_q_vectors; i++, vector++) { - struct ice_q_vector *q_vector = vsi->q_vectors[i]; - - wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), 0); - wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), 0); - for (q = 0; q < q_vector->num_ring_tx; q++) { - wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); - txq++; - } - - for (q = 0; q < q_vector->num_ring_rx; q++) { - wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0); - rxq++; - } - } - - ice_flush(hw); -} - -/** - * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI - * @vsi: the VSI having rings deallocated - */ -static void ice_vsi_clear_rings(struct ice_vsi *vsi) -{ - int i; - - if (vsi->tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) { - if (vsi->tx_rings[i]) { - kfree_rcu(vsi->tx_rings[i], rcu); - vsi->tx_rings[i] = NULL; - } - } - } - if (vsi->rx_rings) { - for (i = 0; i < vsi->alloc_rxq; i++) { - if (vsi->rx_rings[i]) { - kfree_rcu(vsi->rx_rings[i], rcu); - vsi->rx_rings[i] = NULL; - } - } - } -} - -/** - * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI - * @vsi: VSI which is having rings allocated - */ -static int ice_vsi_alloc_rings(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int i; - - /* Allocate tx_rings */ - for (i = 0; i < vsi->alloc_txq; i++) { - struct ice_ring *ring; - - /* allocate with kzalloc(), free with kfree_rcu() */ - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - - if (!ring) - goto err_out; - - ring->q_index = i; - ring->reg_idx = vsi->txq_map[i]; - ring->ring_active = false; - ring->vsi = vsi; - ring->netdev = vsi->netdev; - ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; - - vsi->tx_rings[i] = ring; - } - - /* Allocate rx_rings */ - for (i = 0; i < vsi->alloc_rxq; i++) { - struct ice_ring *ring; - - /* allocate with kzalloc(), free with kfree_rcu() */ - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_out; - - ring->q_index = i; - ring->reg_idx = vsi->rxq_map[i]; - ring->ring_active = false; - ring->vsi = vsi; - ring->netdev = vsi->netdev; - ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; - vsi->rx_rings[i] = ring; - } - - return 0; - -err_out: - ice_vsi_clear_rings(vsi); - return -ENOMEM; -} - -/** - * ice_vsi_free_irq - Free the irq association with the OS - * @vsi: the VSI being configured - */ -static void ice_vsi_free_irq(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int base = vsi->base_vector; - - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - int i; - - if (!vsi->q_vectors || !vsi->irqs_ready) - return; - - vsi->irqs_ready = false; - for (i = 0; i < vsi->num_q_vectors; i++) { - u16 vector = i + base; - int irq_num; - - irq_num = pf->msix_entries[vector].vector; - - /* free only the irqs that were actually requested */ - if (!vsi->q_vectors[i] || - !(vsi->q_vectors[i]->num_ring_tx || - vsi->q_vectors[i]->num_ring_rx)) - continue; - - /* clear the affinity notifier in the IRQ descriptor */ - irq_set_affinity_notifier(irq_num, NULL); - - /* clear the affinity_mask in the IRQ descriptor */ - irq_set_affinity_hint(irq_num, NULL); - synchronize_irq(irq_num); - devm_free_irq(&pf->pdev->dev, irq_num, - vsi->q_vectors[i]); - } - ice_vsi_release_msix(vsi); - } -} - -/** - * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW - * @vsi: the VSI being configured - */ -static void ice_vsi_cfg_msix(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - u16 vector = vsi->base_vector; - struct ice_hw *hw = &pf->hw; - u32 txq = 0, rxq = 0; - int i, q, itr; - u8 itr_gran; - - for (i = 0; i < vsi->num_q_vectors; i++, vector++) { - struct ice_q_vector *q_vector = vsi->q_vectors[i]; - - itr_gran = hw->itr_gran_200; - - if (q_vector->num_ring_rx) { - q_vector->rx.itr = - ITR_TO_REG(vsi->rx_rings[rxq]->rx_itr_setting, - itr_gran); - q_vector->rx.latency_range = ICE_LOW_LATENCY; - } - - if (q_vector->num_ring_tx) { - q_vector->tx.itr = - ITR_TO_REG(vsi->tx_rings[txq]->tx_itr_setting, - itr_gran); - q_vector->tx.latency_range = ICE_LOW_LATENCY; - } - wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), q_vector->rx.itr); - wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), q_vector->tx.itr); - - /* Both Transmit Queue Interrupt Cause Control register - * and Receive Queue Interrupt Cause control register - * expects MSIX_INDX field to be the vector index - * within the function space and not the absolute - * vector index across PF or across device. - * For SR-IOV VF VSIs queue vector index always starts - * with 1 since first vector index(0) is used for OICR - * in VF space. Since VMDq and other PF VSIs are withtin - * the PF function space, use the vector index thats - * tracked for this PF. - */ - for (q = 0; q < q_vector->num_ring_tx; q++) { - u32 val; - - itr = ICE_TX_ITR; - val = QINT_TQCTL_CAUSE_ENA_M | - (itr << QINT_TQCTL_ITR_INDX_S) | - (vector << QINT_TQCTL_MSIX_INDX_S); - wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); - txq++; - } - - for (q = 0; q < q_vector->num_ring_rx; q++) { - u32 val; - - itr = ICE_RX_ITR; - val = QINT_RQCTL_CAUSE_ENA_M | - (itr << QINT_RQCTL_ITR_INDX_S) | - (vector << QINT_RQCTL_MSIX_INDX_S); - wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); - rxq++; - } - } - - ice_flush(hw); -} - -/** * ice_ena_misc_vector - enable the non-queue interrupts * @pf: board private structure */ @@ -1688,20 +1282,18 @@ static void ice_ena_misc_vector(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, 0); /* disable all */ rd32(hw, PFINT_OICR); /* read to clear */ - val = (PFINT_OICR_HLP_RDY_M | - PFINT_OICR_CPM_RDY_M | - PFINT_OICR_ECC_ERR_M | + val = (PFINT_OICR_ECC_ERR_M | PFINT_OICR_MAL_DETECT_M | PFINT_OICR_GRST_M | PFINT_OICR_PCI_EXCEPTION_M | - PFINT_OICR_GPIO_M | - PFINT_OICR_STORM_DETECT_M | - PFINT_OICR_HMC_ERR_M); + PFINT_OICR_VFLR_M | + PFINT_OICR_HMC_ERR_M | + PFINT_OICR_PE_CRITERR_M); wr32(hw, PFINT_OICR_ENA, val); /* SW_ITR_IDX = 0, but don't change INTENA */ - wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); } @@ -1718,12 +1310,23 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) u32 oicr, ena_mask; set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); + if (oicr & PFINT_OICR_MAL_DETECT_M) { + ena_mask &= ~PFINT_OICR_MAL_DETECT_M; + set_bit(__ICE_MDD_EVENT_PENDING, pf->state); + } + if (oicr & PFINT_OICR_VFLR_M) { + ena_mask &= ~PFINT_OICR_VFLR_M; + set_bit(__ICE_VFLR_EVENT_PENDING, pf->state); + } + if (oicr & PFINT_OICR_GRST_M) { u32 reset; + /* we have a reset warning */ ena_mask &= ~PFINT_OICR_GRST_M; reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> @@ -1733,15 +1336,18 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) pf->corer_count++; else if (reset == ICE_RESET_GLOBR) pf->globr_count++; - else + else if (reset == ICE_RESET_EMPR) pf->empr_count++; + else + dev_dbg(&pf->pdev->dev, "Invalid reset type %d\n", + reset); /* If a reset cycle isn't already in progress, we set a bit in * pf->state so that the service task can start a reset/rebuild. * We also make note of which reset happened so that peer * devices/drivers can be informed. */ - if (!test_bit(__ICE_RESET_RECOVERY_PENDING, pf->state)) { + if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) { if (reset == ICE_RESET_CORER) set_bit(__ICE_CORER_RECV, pf->state); else if (reset == ICE_RESET_GLOBR) @@ -1749,7 +1355,20 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) else set_bit(__ICE_EMPR_RECV, pf->state); - set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + /* There are couple of different bits at play here. + * hw->reset_ongoing indicates whether the hardware is + * in reset. This is set to true when a reset interrupt + * is received and set back to false after the driver + * has determined that the hardware is out of reset. + * + * __ICE_RESET_OICR_RECV in pf->state indicates + * that a post reset rebuild is required before the + * driver is operational again. This is set above. + * + * As this is the start of the reset/rebuild cycle, set + * both to indicate that. + */ + hw->reset_ongoing = true; } } @@ -1790,208 +1409,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) } /** - * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors - * @vsi: the VSI being configured - * - * This function maps descriptor rings to the queue-specific vectors allotted - * through the MSI-X enabling code. On a constrained vector budget, we map Tx - * and Rx rings to the vector as "efficiently" as possible. - */ -static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) -{ - int q_vectors = vsi->num_q_vectors; - int tx_rings_rem, rx_rings_rem; - int v_id; - - /* initially assigning remaining rings count to VSIs num queue value */ - tx_rings_rem = vsi->num_txq; - rx_rings_rem = vsi->num_rxq; - - for (v_id = 0; v_id < q_vectors; v_id++) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; - int tx_rings_per_v, rx_rings_per_v, q_id, q_base; - - /* Tx rings mapping to vector */ - tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); - q_vector->num_ring_tx = tx_rings_per_v; - q_vector->tx.ring = NULL; - q_base = vsi->num_txq - tx_rings_rem; - - for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { - struct ice_ring *tx_ring = vsi->tx_rings[q_id]; - - tx_ring->q_vector = q_vector; - tx_ring->next = q_vector->tx.ring; - q_vector->tx.ring = tx_ring; - } - tx_rings_rem -= tx_rings_per_v; - - /* Rx rings mapping to vector */ - rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); - q_vector->num_ring_rx = rx_rings_per_v; - q_vector->rx.ring = NULL; - q_base = vsi->num_rxq - rx_rings_rem; - - for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { - struct ice_ring *rx_ring = vsi->rx_rings[q_id]; - - rx_ring->q_vector = q_vector; - rx_ring->next = q_vector->rx.ring; - q_vector->rx.ring = rx_ring; - } - rx_rings_rem -= rx_rings_per_v; - } -} - -/** - * ice_vsi_set_num_qs - Set num queues, descriptors and vectors for a VSI - * @vsi: the VSI being configured - * - * Return 0 on success and a negative value on error - */ -static void ice_vsi_set_num_qs(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - - switch (vsi->type) { - case ICE_VSI_PF: - vsi->alloc_txq = pf->num_lan_tx; - vsi->alloc_rxq = pf->num_lan_rx; - vsi->num_desc = ALIGN(ICE_DFLT_NUM_DESC, ICE_REQ_DESC_MULTIPLE); - vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx); - break; - default: - dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", - vsi->type); - break; - } -} - -/** - * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi - * @vsi: VSI pointer - * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. - * - * On error: returns error code (negative) - * On success: returns 0 - */ -static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors) -{ - struct ice_pf *pf = vsi->back; - - /* allocate memory for both Tx and Rx ring pointers */ - vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq, - sizeof(struct ice_ring *), GFP_KERNEL); - if (!vsi->tx_rings) - goto err_txrings; - - vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq, - sizeof(struct ice_ring *), GFP_KERNEL); - if (!vsi->rx_rings) - goto err_rxrings; - - if (alloc_qvectors) { - /* allocate memory for q_vector pointers */ - vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, - vsi->num_q_vectors, - sizeof(struct ice_q_vector *), - GFP_KERNEL); - if (!vsi->q_vectors) - goto err_vectors; - } - - return 0; - -err_vectors: - devm_kfree(&pf->pdev->dev, vsi->rx_rings); -err_rxrings: - devm_kfree(&pf->pdev->dev, vsi->tx_rings); -err_txrings: - return -ENOMEM; -} - -/** - * ice_msix_clean_rings - MSIX mode Interrupt Handler - * @irq: interrupt number - * @data: pointer to a q_vector - */ -static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) -{ - struct ice_q_vector *q_vector = (struct ice_q_vector *)data; - - if (!q_vector->tx.ring && !q_vector->rx.ring) - return IRQ_HANDLED; - - napi_schedule(&q_vector->napi); - - return IRQ_HANDLED; -} - -/** - * ice_vsi_alloc - Allocates the next available struct vsi in the PF - * @pf: board private structure - * @type: type of VSI - * - * returns a pointer to a VSI on success, NULL on failure. - */ -static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type) -{ - struct ice_vsi *vsi = NULL; - - /* Need to protect the allocation of the VSIs at the PF level */ - mutex_lock(&pf->sw_mutex); - - /* If we have already allocated our maximum number of VSIs, - * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index - * is available to be populated - */ - if (pf->next_vsi == ICE_NO_VSI) { - dev_dbg(&pf->pdev->dev, "out of VSI slots!\n"); - goto unlock_pf; - } - - vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL); - if (!vsi) - goto unlock_pf; - - vsi->type = type; - vsi->back = pf; - set_bit(__ICE_DOWN, vsi->state); - vsi->idx = pf->next_vsi; - vsi->work_lmt = ICE_DFLT_IRQ_WORK; - - ice_vsi_set_num_qs(vsi); - - switch (vsi->type) { - case ICE_VSI_PF: - if (ice_vsi_alloc_arrays(vsi, true)) - goto err_rings; - - /* Setup default MSIX irq handler for VSI */ - vsi->irq_handler = ice_msix_clean_rings; - break; - default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); - goto unlock_pf; - } - - /* fill VSI slot in the PF struct */ - pf->vsi[pf->next_vsi] = vsi; - - /* prepare pf->next_vsi for next use */ - pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, - pf->next_vsi); - goto unlock_pf; - -err_rings: - devm_kfree(&pf->pdev->dev, vsi); - vsi = NULL; -unlock_pf: - mutex_unlock(&pf->sw_mutex); - return vsi; -} - -/** * ice_free_irq_msix_misc - Unroll misc vector setup * @pf: board private structure */ @@ -2002,12 +1419,15 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) ice_flush(&pf->hw); if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { - synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); + synchronize_irq(pf->msix_entries[pf->sw_oicr_idx].vector); devm_free_irq(&pf->pdev->dev, - pf->msix_entries[pf->oicr_idx].vector, pf); + pf->msix_entries[pf->sw_oicr_idx].vector, pf); } - ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); + pf->num_avail_sw_msix += 1; + ice_free_res(pf->sw_irq_tracker, pf->sw_oicr_idx, ICE_RES_MISC_VEC_ID); + pf->num_avail_hw_msix += 1; + ice_free_res(pf->hw_irq_tracker, pf->hw_oicr_idx, ICE_RES_MISC_VEC_ID); } /** @@ -2034,44 +1454,61 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) * lost during reset. Note that this function is called only during * rebuild path and not while reset is in progress. */ - if (ice_is_reset_recovery_pending(pf->state)) + if (ice_is_reset_in_progress(pf->state)) goto skip_req_irq; - /* reserve one vector in irq_tracker for misc interrupts */ - oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); + /* reserve one vector in sw_irq_tracker for misc interrupts */ + oicr_idx = ice_get_res(pf, pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); if (oicr_idx < 0) return oicr_idx; - pf->oicr_idx = oicr_idx; + pf->num_avail_sw_msix -= 1; + pf->sw_oicr_idx = oicr_idx; + + /* reserve one vector in hw_irq_tracker for misc interrupts */ + oicr_idx = ice_get_res(pf, pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); + if (oicr_idx < 0) { + ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); + pf->num_avail_sw_msix += 1; + return oicr_idx; + } + pf->num_avail_hw_msix -= 1; + pf->hw_oicr_idx = oicr_idx; err = devm_request_irq(&pf->pdev->dev, - pf->msix_entries[pf->oicr_idx].vector, + pf->msix_entries[pf->sw_oicr_idx].vector, ice_misc_intr, 0, pf->int_name, pf); if (err) { dev_err(&pf->pdev->dev, "devm_request_irq for %s failed: %d\n", pf->int_name, err); - ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); + ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); + pf->num_avail_sw_msix += 1; + ice_free_res(pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); + pf->num_avail_hw_msix += 1; return err; } skip_req_irq: ice_ena_misc_vector(pf); - val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) | - PFINT_OICR_CTL_CAUSE_ENA_M; + val = ((pf->hw_oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) | + PFINT_OICR_CTL_CAUSE_ENA_M); wr32(hw, PFINT_OICR_CTL, val); /* This enables Admin queue Interrupt causes */ - val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | - (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) | - PFINT_FW_CTL_CAUSE_ENA_M; + val = ((pf->hw_oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) | + PFINT_FW_CTL_CAUSE_ENA_M); wr32(hw, PFINT_FW_CTL, val); - itr_gran = hw->itr_gran_200; + /* This enables Mailbox queue Interrupt causes */ + val = ((pf->hw_oicr_idx & PFINT_MBX_CTL_MSIX_INDX_M) | + PFINT_MBX_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_MBX_CTL, val); + + itr_gran = hw->itr_gran; - wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx), + wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx), ITR_TO_REG(ICE_ITR_8K, itr_gran)); ice_flush(hw); @@ -2081,209 +1518,43 @@ skip_req_irq: } /** - * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI - * @vsi: the VSI getting queues - * - * Return 0 on success and a negative value on error - */ -static int ice_vsi_get_qs_contig(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int offset, ret = 0; - - mutex_lock(&pf->avail_q_mutex); - /* look for contiguous block of queues for tx */ - offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS, - 0, vsi->alloc_txq, 0); - if (offset < ICE_MAX_TXQS) { - int i; - - bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq); - for (i = 0; i < vsi->alloc_txq; i++) - vsi->txq_map[i] = i + offset; - } else { - ret = -ENOMEM; - vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER; - } - - /* look for contiguous block of queues for rx */ - offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS, - 0, vsi->alloc_rxq, 0); - if (offset < ICE_MAX_RXQS) { - int i; - - bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq); - for (i = 0; i < vsi->alloc_rxq; i++) - vsi->rxq_map[i] = i + offset; - } else { - ret = -ENOMEM; - vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER; - } - mutex_unlock(&pf->avail_q_mutex); - - return ret; -} - -/** - * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI - * @vsi: the VSI getting queues - * - * Return 0 on success and a negative value on error + * ice_napi_del - Remove NAPI handler for the VSI + * @vsi: VSI for which NAPI handler is to be removed */ -static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi) +static void ice_napi_del(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; - int i, index = 0; - - mutex_lock(&pf->avail_q_mutex); - - if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) { - for (i = 0; i < vsi->alloc_txq; i++) { - index = find_next_zero_bit(pf->avail_txqs, - ICE_MAX_TXQS, index); - if (index < ICE_MAX_TXQS) { - set_bit(index, pf->avail_txqs); - vsi->txq_map[i] = index; - } else { - goto err_scatter_tx; - } - } - } - - if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) { - for (i = 0; i < vsi->alloc_rxq; i++) { - index = find_next_zero_bit(pf->avail_rxqs, - ICE_MAX_RXQS, index); - if (index < ICE_MAX_RXQS) { - set_bit(index, pf->avail_rxqs); - vsi->rxq_map[i] = index; - } else { - goto err_scatter_rx; - } - } - } - - mutex_unlock(&pf->avail_q_mutex); - return 0; + int v_idx; -err_scatter_rx: - /* unflag any queues we have grabbed (i is failed position) */ - for (index = 0; index < i; index++) { - clear_bit(vsi->rxq_map[index], pf->avail_rxqs); - vsi->rxq_map[index] = 0; - } - i = vsi->alloc_txq; -err_scatter_tx: - /* i is either position of failed attempt or vsi->alloc_txq */ - for (index = 0; index < i; index++) { - clear_bit(vsi->txq_map[index], pf->avail_txqs); - vsi->txq_map[index] = 0; - } + if (!vsi->netdev) + return; - mutex_unlock(&pf->avail_q_mutex); - return -ENOMEM; + for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) + netif_napi_del(&vsi->q_vectors[v_idx]->napi); } /** - * ice_vsi_get_qs - Assign queues from PF to VSI - * @vsi: the VSI to assign queues to + * ice_napi_add - register NAPI handler for the VSI + * @vsi: VSI for which NAPI handler is to be registered * - * Returns 0 on success and a negative value on error + * This function is only called in the driver's load path. Registering the NAPI + * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume, + * reset/rebuild, etc.) */ -static int ice_vsi_get_qs(struct ice_vsi *vsi) +static void ice_napi_add(struct ice_vsi *vsi) { - int ret = 0; - - vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG; - vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG; - - /* NOTE: ice_vsi_get_qs_contig() will set the rx/tx mapping - * modes individually to scatter if assigning contiguous queues - * to rx or tx fails - */ - ret = ice_vsi_get_qs_contig(vsi); - if (ret < 0) { - if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) - vsi->alloc_txq = max_t(u16, vsi->alloc_txq, - ICE_MAX_SCATTER_TXQS); - if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) - vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq, - ICE_MAX_SCATTER_RXQS); - ret = ice_vsi_get_qs_scatter(vsi); - } - - return ret; -} - -/** - * ice_vsi_put_qs - Release queues from VSI to PF - * @vsi: the VSI thats going to release queues - */ -static void ice_vsi_put_qs(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int i; - - mutex_lock(&pf->avail_q_mutex); - - for (i = 0; i < vsi->alloc_txq; i++) { - clear_bit(vsi->txq_map[i], pf->avail_txqs); - vsi->txq_map[i] = ICE_INVAL_Q_INDEX; - } - - for (i = 0; i < vsi->alloc_rxq; i++) { - clear_bit(vsi->rxq_map[i], pf->avail_rxqs); - vsi->rxq_map[i] = ICE_INVAL_Q_INDEX; - } - - mutex_unlock(&pf->avail_q_mutex); -} - -/** - * ice_free_q_vector - Free memory allocated for a specific interrupt vector - * @vsi: VSI having the memory freed - * @v_idx: index of the vector to be freed - */ -static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) -{ - struct ice_q_vector *q_vector; - struct ice_ring *ring; + int v_idx; - if (!vsi->q_vectors[v_idx]) { - dev_dbg(&vsi->back->pdev->dev, "Queue vector at index %d not found\n", - v_idx); + if (!vsi->netdev) return; - } - q_vector = vsi->q_vectors[v_idx]; - - ice_for_each_ring(ring, q_vector->tx) - ring->q_vector = NULL; - ice_for_each_ring(ring, q_vector->rx) - ring->q_vector = NULL; - - /* only VSI with an associated netdev is set up with NAPI */ - if (vsi->netdev) - netif_napi_del(&q_vector->napi); - - devm_kfree(&vsi->back->pdev->dev, q_vector); - vsi->q_vectors[v_idx] = NULL; -} - -/** - * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors - * @vsi: the VSI having memory freed - */ -static void ice_vsi_free_q_vectors(struct ice_vsi *vsi) -{ - int v_idx; for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) - ice_free_q_vector(vsi, v_idx); + netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, + ice_napi_poll, NAPI_POLL_WEIGHT); } /** - * ice_cfg_netdev - Setup the netdev flags - * @vsi: the VSI being configured + * ice_cfg_netdev - Allocate, configure and register a netdev + * @vsi: the VSI associated with the new netdev * * Returns 0 on success, negative value on failure */ @@ -2296,6 +1567,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; + int err; netdev = alloc_etherdev_mqs(sizeof(struct ice_netdev_priv), vsi->alloc_txq, vsi->alloc_rxq); @@ -2353,195 +1625,14 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = ICE_MAX_MTU; - return 0; -} - -/** - * ice_vsi_free_arrays - clean up vsi resources - * @vsi: pointer to VSI being cleared - * @free_qvectors: bool to specify if q_vectors should be deallocated - */ -static void ice_vsi_free_arrays(struct ice_vsi *vsi, bool free_qvectors) -{ - struct ice_pf *pf = vsi->back; - - /* free the ring and vector containers */ - if (free_qvectors && vsi->q_vectors) { - devm_kfree(&pf->pdev->dev, vsi->q_vectors); - vsi->q_vectors = NULL; - } - if (vsi->tx_rings) { - devm_kfree(&pf->pdev->dev, vsi->tx_rings); - vsi->tx_rings = NULL; - } - if (vsi->rx_rings) { - devm_kfree(&pf->pdev->dev, vsi->rx_rings); - vsi->rx_rings = NULL; - } -} - -/** - * ice_vsi_clear - clean up and deallocate the provided vsi - * @vsi: pointer to VSI being cleared - * - * This deallocates the vsi's queue resources, removes it from the PF's - * VSI array if necessary, and deallocates the VSI - * - * Returns 0 on success, negative on failure - */ -static int ice_vsi_clear(struct ice_vsi *vsi) -{ - struct ice_pf *pf = NULL; - - if (!vsi) - return 0; - - if (!vsi->back) - return -EINVAL; - - pf = vsi->back; - - if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) { - dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n", - vsi->idx); - return -EINVAL; - } - - mutex_lock(&pf->sw_mutex); - /* updates the PF for this cleared vsi */ - - pf->vsi[vsi->idx] = NULL; - if (vsi->idx < pf->next_vsi) - pf->next_vsi = vsi->idx; - - ice_vsi_free_arrays(vsi, true); - mutex_unlock(&pf->sw_mutex); - devm_kfree(&pf->pdev->dev, vsi); - - return 0; -} - -/** - * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector - * @vsi: the VSI being configured - * @v_idx: index of the vector in the vsi struct - * - * We allocate one q_vector. If allocation fails we return -ENOMEM. - */ -static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) -{ - struct ice_pf *pf = vsi->back; - struct ice_q_vector *q_vector; - - /* allocate q_vector */ - q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) - return -ENOMEM; - - q_vector->vsi = vsi; - q_vector->v_idx = v_idx; - /* only set affinity_mask if the CPU is online */ - if (cpu_online(v_idx)) - cpumask_set_cpu(v_idx, &q_vector->affinity_mask); - - if (vsi->netdev) - netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll, - NAPI_POLL_WEIGHT); - /* tie q_vector and vsi together */ - vsi->q_vectors[v_idx] = q_vector; - - return 0; -} - -/** - * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors - * @vsi: the VSI being configured - * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. - */ -static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int v_idx = 0, num_q_vectors; - int err; - - if (vsi->q_vectors[0]) { - dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n", - vsi->vsi_num); - return -EEXIST; - } - - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { - num_q_vectors = vsi->num_q_vectors; - } else { - err = -EINVAL; - goto err_out; - } - - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - err = ice_vsi_alloc_q_vector(vsi, v_idx); - if (err) - goto err_out; - } - - return 0; - -err_out: - while (v_idx--) - ice_free_q_vector(vsi, v_idx); - - dev_err(&pf->pdev->dev, - "Failed to allocate %d q_vector for VSI %d, ret=%d\n", - vsi->num_q_vectors, vsi->vsi_num, err); - vsi->num_q_vectors = 0; - return err; -} - -/** - * ice_vsi_setup_vector_base - Set up the base vector for the given VSI - * @vsi: ptr to the VSI - * - * This should only be called after ice_vsi_alloc() which allocates the - * corresponding SW VSI structure and initializes num_queue_pairs for the - * newly allocated VSI. - * - * Returns 0 on success or negative on failure - */ -static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - int num_q_vectors = 0; - - if (vsi->base_vector) { - dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n", - vsi->vsi_num, vsi->base_vector); - return -EEXIST; - } - - if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - return -ENOENT; - - switch (vsi->type) { - case ICE_VSI_PF: - num_q_vectors = vsi->num_q_vectors; - break; - default: - dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n", - vsi->type); - break; - } + err = register_netdev(vsi->netdev); + if (err) + return err; - if (num_q_vectors) - vsi->base_vector = ice_get_res(pf, pf->irq_tracker, - num_q_vectors, vsi->idx); + netif_carrier_off(vsi->netdev); - if (vsi->base_vector < 0) { - dev_err(&pf->pdev->dev, - "Failed to get tracking for %d vectors for VSI %d, err=%d\n", - num_q_vectors, vsi->vsi_num, vsi->base_vector); - return -ENOENT; - } + /* make sure transmit queues start off as stopped */ + netif_tx_stop_all_queues(vsi->netdev); return 0; } @@ -2561,327 +1652,17 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) } /** - * ice_vsi_cfg_rss - Configure RSS params for a VSI - * @vsi: VSI to be configured - */ -static int ice_vsi_cfg_rss(struct ice_vsi *vsi) -{ - u8 seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE]; - struct ice_aqc_get_set_rss_keys *key; - struct ice_pf *pf = vsi->back; - enum ice_status status; - int err = 0; - u8 *lut; - - vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq); - - lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL); - if (!lut) - return -ENOMEM; - - if (vsi->rss_lut_user) - memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); - else - ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); - - status = ice_aq_set_rss_lut(&pf->hw, vsi->vsi_num, vsi->rss_lut_type, - lut, vsi->rss_table_size); - - if (status) { - dev_err(&vsi->back->pdev->dev, - "set_rss_lut failed, error %d\n", status); - err = -EIO; - goto ice_vsi_cfg_rss_exit; - } - - key = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*key), GFP_KERNEL); - if (!key) { - err = -ENOMEM; - goto ice_vsi_cfg_rss_exit; - } - - if (vsi->rss_hkey_user) - memcpy(seed, vsi->rss_hkey_user, - ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); - else - netdev_rss_key_fill((void *)seed, - ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); - memcpy(&key->standard_rss_key, seed, - ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); - - status = ice_aq_set_rss_key(&pf->hw, vsi->vsi_num, key); - - if (status) { - dev_err(&vsi->back->pdev->dev, "set_rss_key failed, error %d\n", - status); - err = -EIO; - } - - devm_kfree(&pf->pdev->dev, key); -ice_vsi_cfg_rss_exit: - devm_kfree(&pf->pdev->dev, lut); - return err; -} - -/** - * ice_vsi_reinit_setup - return resource and reallocate resource for a VSI - * @vsi: pointer to the ice_vsi - * - * This reallocates the VSIs queue resources - * - * Returns 0 on success and negative value on failure - */ -static int ice_vsi_reinit_setup(struct ice_vsi *vsi) -{ - u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; - int ret, i; - - if (!vsi) - return -EINVAL; - - ice_vsi_free_q_vectors(vsi); - ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); - vsi->base_vector = 0; - ice_vsi_clear_rings(vsi); - ice_vsi_free_arrays(vsi, false); - ice_vsi_set_num_qs(vsi); - - /* Initialize VSI struct elements and create VSI in FW */ - ret = ice_vsi_add(vsi); - if (ret < 0) - goto err_vsi; - - ret = ice_vsi_alloc_arrays(vsi, false); - if (ret < 0) - goto err_vsi; - - switch (vsi->type) { - case ICE_VSI_PF: - if (!vsi->netdev) { - ret = ice_cfg_netdev(vsi); - if (ret) - goto err_rings; - - ret = register_netdev(vsi->netdev); - if (ret) - goto err_rings; - - netif_carrier_off(vsi->netdev); - netif_tx_stop_all_queues(vsi->netdev); - } - - ret = ice_vsi_alloc_q_vectors(vsi); - if (ret) - goto err_rings; - - ret = ice_vsi_setup_vector_base(vsi); - if (ret) - goto err_vectors; - - ret = ice_vsi_alloc_rings(vsi); - if (ret) - goto err_vectors; - - ice_vsi_map_rings_to_vectors(vsi); - break; - default: - break; - } - - ice_vsi_set_tc_cfg(vsi); - - /* configure VSI nodes based on number of queues and TC's */ - for (i = 0; i < vsi->tc_cfg.numtc; i++) - max_txqs[i] = vsi->num_txq; - - ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num, - vsi->tc_cfg.ena_tc, max_txqs); - if (ret) { - dev_info(&vsi->back->pdev->dev, - "Failed VSI lan queue config\n"); - goto err_vectors; - } - return 0; - -err_vectors: - ice_vsi_free_q_vectors(vsi); -err_rings: - if (vsi->netdev) { - vsi->current_netdev_flags = 0; - unregister_netdev(vsi->netdev); - free_netdev(vsi->netdev); - vsi->netdev = NULL; - } -err_vsi: - ice_vsi_clear(vsi); - set_bit(__ICE_RESET_FAILED, vsi->back->state); - return ret; -} - -/** - * ice_vsi_setup - Set up a VSI by a given type + * ice_pf_vsi_setup - Set up a PF VSI * @pf: board private structure - * @type: VSI type * @pi: pointer to the port_info instance * - * This allocates the sw VSI structure and its queue resources. - * - * Returns pointer to the successfully allocated and configure VSI sw struct on - * success, otherwise returns NULL on failure. + * Returns pointer to the successfully allocated VSI sw struct on success, + * otherwise returns NULL on failure. */ static struct ice_vsi * -ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type, - struct ice_port_info *pi) -{ - u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; - struct device *dev = &pf->pdev->dev; - struct ice_vsi_ctx ctxt = { 0 }; - struct ice_vsi *vsi; - int ret, i; - - vsi = ice_vsi_alloc(pf, type); - if (!vsi) { - dev_err(dev, "could not allocate VSI\n"); - return NULL; - } - - vsi->port_info = pi; - vsi->vsw = pf->first_sw; - - if (ice_vsi_get_qs(vsi)) { - dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", - vsi->idx); - goto err_get_qs; - } - - /* set RSS capabilities */ - ice_vsi_set_rss_params(vsi); - - /* create the VSI */ - ret = ice_vsi_add(vsi); - if (ret) - goto err_vsi; - - ctxt.vsi_num = vsi->vsi_num; - - switch (vsi->type) { - case ICE_VSI_PF: - ret = ice_cfg_netdev(vsi); - if (ret) - goto err_cfg_netdev; - - ret = register_netdev(vsi->netdev); - if (ret) - goto err_register_netdev; - - netif_carrier_off(vsi->netdev); - - /* make sure transmit queues start off as stopped */ - netif_tx_stop_all_queues(vsi->netdev); - ret = ice_vsi_alloc_q_vectors(vsi); - if (ret) - goto err_msix; - - ret = ice_vsi_setup_vector_base(vsi); - if (ret) - goto err_rings; - - ret = ice_vsi_alloc_rings(vsi); - if (ret) - goto err_rings; - - ice_vsi_map_rings_to_vectors(vsi); - - /* Do not exit if configuring RSS had an issue, at least - * receive traffic on first queue. Hence no need to capture - * return value - */ - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) - ice_vsi_cfg_rss(vsi); - break; - default: - /* if vsi type is not recognized, clean up the resources and - * exit - */ - goto err_rings; - } - - ice_vsi_set_tc_cfg(vsi); - - /* configure VSI nodes based on number of queues and TC's */ - for (i = 0; i < vsi->tc_cfg.numtc; i++) - max_txqs[i] = vsi->num_txq; - - ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num, - vsi->tc_cfg.ena_tc, max_txqs); - if (ret) { - dev_info(&pf->pdev->dev, "Failed VSI lan queue config\n"); - goto err_rings; - } - - return vsi; - -err_rings: - ice_vsi_free_q_vectors(vsi); -err_msix: - if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED) - unregister_netdev(vsi->netdev); -err_register_netdev: - if (vsi->netdev) { - free_netdev(vsi->netdev); - vsi->netdev = NULL; - } -err_cfg_netdev: - ret = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL); - if (ret) - dev_err(&vsi->back->pdev->dev, - "Free VSI AQ call failed, err %d\n", ret); -err_vsi: - ice_vsi_put_qs(vsi); -err_get_qs: - pf->q_left_tx += vsi->alloc_txq; - pf->q_left_rx += vsi->alloc_rxq; - ice_vsi_clear(vsi); - - return NULL; -} - -/** - * ice_vsi_add_vlan - Add vsi membership for given vlan - * @vsi: the vsi being configured - * @vid: vlan id to be added - */ -static int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) +ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - struct ice_fltr_list_entry *tmp; - struct ice_pf *pf = vsi->back; - LIST_HEAD(tmp_add_list); - enum ice_status status; - int err = 0; - - tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; - tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; - tmp->fltr_info.flag = ICE_FLTR_TX; - tmp->fltr_info.src = vsi->vsi_num; - tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num; - tmp->fltr_info.l_data.vlan.vlan_id = vid; - - INIT_LIST_HEAD(&tmp->list_entry); - list_add(&tmp->list_entry, &tmp_add_list); - - status = ice_add_vlan(&pf->hw, &tmp_add_list); - if (status) { - err = -ENODEV; - dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n", - vid, vsi->vsi_num); - } - - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); - return err; + return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID); } /** @@ -2897,7 +1678,7 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - int ret = 0; + int ret; if (vid >= VLAN_N_VID) { netdev_err(netdev, "VLAN id requested %d is out of range %d\n", @@ -2908,6 +1689,13 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, if (vsi->info.pvid) return -EINVAL; + /* Enable VLAN pruning when VLAN 0 is added */ + if (unlikely(!vid)) { + ret = ice_cfg_vlan_pruning(vsi, true); + if (ret) + return ret; + } + /* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is * needed to continue allowing all untagged packets since VLAN prune * list is applied to all packets by the switch @@ -2921,38 +1709,6 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, } /** - * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN - * @vsi: the VSI being configured - * @vid: VLAN id to be removed - */ -static void ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) -{ - struct ice_fltr_list_entry *list; - struct ice_pf *pf = vsi->back; - LIST_HEAD(tmp_add_list); - - list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); - if (!list) - return; - - list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; - list->fltr_info.fwd_id.vsi_id = vsi->vsi_num; - list->fltr_info.fltr_act = ICE_FWD_TO_VSI; - list->fltr_info.l_data.vlan.vlan_id = vid; - list->fltr_info.flag = ICE_FLTR_TX; - list->fltr_info.src = vsi->vsi_num; - - INIT_LIST_HEAD(&list->list_entry); - list_add(&list->list_entry, &tmp_add_list); - - if (ice_remove_vlan(&pf->hw, &tmp_add_list)) - dev_err(&pf->pdev->dev, "Error removing VLAN %d on vsi %i\n", - vid, vsi->vsi_num); - - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); -} - -/** * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload * @netdev: network interface to be adjusted * @proto: unused protocol @@ -2965,19 +1721,25 @@ static int ice_vlan_rx_kill_vid(struct net_device *netdev, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + int status; if (vsi->info.pvid) return -EINVAL; - /* return code is ignored as there is nothing a user - * can do about failure to remove and a log message was - * already printed from the other function + /* Make sure ice_vsi_kill_vlan is successful before updating VLAN + * information */ - ice_vsi_kill_vlan(vsi, vid); + status = ice_vsi_kill_vlan(vsi, vid); + if (status) + return status; clear_bit(vid, vsi->active_vlans); - return 0; + /* Disable VLAN pruning when VLAN 0 is removed */ + if (unlikely(!vid)) + status = ice_cfg_vlan_pruning(vsi, false); + + return status; } /** @@ -2993,59 +1755,73 @@ static int ice_setup_pf_sw(struct ice_pf *pf) struct ice_vsi *vsi; int status = 0; - if (!ice_is_reset_recovery_pending(pf->state)) { - vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info); - if (!vsi) { - status = -ENOMEM; - goto error_exit; - } - } else { - vsi = pf->vsi[0]; - status = ice_vsi_reinit_setup(vsi); - if (status < 0) - return -EIO; + if (ice_is_reset_in_progress(pf->state)) + return -EBUSY; + + vsi = ice_pf_vsi_setup(pf, pf->hw.port_info); + if (!vsi) { + status = -ENOMEM; + goto unroll_vsi_setup; } - /* tmp_add_list contains a list of MAC addresses for which MAC - * filters need to be programmed. Add the VSI's unicast MAC to - * this list + status = ice_cfg_netdev(vsi); + if (status) { + status = -ENODEV; + goto unroll_vsi_setup; + } + + /* registering the NAPI handler requires both the queues and + * netdev to be created, which are done in ice_pf_vsi_setup() + * and ice_cfg_netdev() respectively + */ + ice_napi_add(vsi); + + /* To add a MAC filter, first add the MAC to a list and then + * pass the list to ice_add_mac. */ + + /* Add a unicast MAC filter so the VSI can get its packets */ status = ice_add_mac_to_list(vsi, &tmp_add_list, vsi->port_info->mac.perm_addr); if (status) - goto error_exit; + goto unroll_napi_add; /* VSI needs to receive broadcast traffic, so add the broadcast - * MAC address to the list. + * MAC address to the list as well. */ eth_broadcast_addr(broadcast); status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast); if (status) - goto error_exit; + goto free_mac_list; /* program MAC filters for entries in tmp_add_list */ status = ice_add_mac(&pf->hw, &tmp_add_list); if (status) { dev_err(&pf->pdev->dev, "Could not add MAC filters\n"); status = -ENOMEM; - goto error_exit; + goto free_mac_list; } ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); return status; -error_exit: +free_mac_list: ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); +unroll_napi_add: if (vsi) { - ice_vsi_free_q_vectors(vsi); - if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED) - unregister_netdev(vsi->netdev); + ice_napi_del(vsi); if (vsi->netdev) { + if (vsi->netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; } + } +unroll_vsi_setup: + if (vsi) { + ice_vsi_free_q_vectors(vsi); ice_vsi_delete(vsi); ice_vsi_put_qs(vsi); pf->q_left_tx += vsi->alloc_txq; @@ -3086,10 +1862,7 @@ static void ice_determine_q_usage(struct ice_pf *pf) */ static void ice_deinit_pf(struct ice_pf *pf) { - if (pf->serv_tmr.function) - del_timer_sync(&pf->serv_tmr); - if (pf->serv_task.func) - cancel_work_sync(&pf->serv_task); + ice_service_task_stop(pf); mutex_destroy(&pf->sw_mutex); mutex_destroy(&pf->avail_q_mutex); } @@ -3102,6 +1875,15 @@ static void ice_init_pf(struct ice_pf *pf) { bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS); set_bit(ICE_FLAG_MSIX_ENA, pf->flags); +#ifdef CONFIG_PCI_IOV + if (pf->hw.func_caps.common_cap.sr_iov_1_1) { + struct ice_hw *hw = &pf->hw; + + set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); + pf->num_vfs_supported = min_t(int, hw->func_caps.num_allocd_vfs, + ICE_MAX_VF_COUNT); + } +#endif /* CONFIG_PCI_IOV */ mutex_init(&pf->sw_mutex); mutex_init(&pf->avail_q_mutex); @@ -3144,6 +1926,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) /* reserve vectors for LAN traffic */ pf->num_lan_msix = min_t(int, num_online_cpus(), v_left); v_budget += pf->num_lan_msix; + v_left -= pf->num_lan_msix; pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget, sizeof(struct msix_entry), GFP_KERNEL); @@ -3171,10 +1954,11 @@ static int ice_ena_msix_range(struct ice_pf *pf) "not enough vectors. requested = %d, obtained = %d\n", v_budget, v_actual); if (v_actual >= (pf->num_lan_msix + 1)) { - pf->num_avail_msix = v_actual - (pf->num_lan_msix + 1); + pf->num_avail_sw_msix = v_actual - + (pf->num_lan_msix + 1); } else if (v_actual >= 2) { pf->num_lan_msix = 1; - pf->num_avail_msix = v_actual - 2; + pf->num_avail_sw_msix = v_actual - 2; } else { pci_disable_msix(pf->pdev); err = -ERANGE; @@ -3207,12 +1991,32 @@ static void ice_dis_msix(struct ice_pf *pf) } /** + * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme + * @pf: board private structure + */ +static void ice_clear_interrupt_scheme(struct ice_pf *pf) +{ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + ice_dis_msix(pf); + + if (pf->sw_irq_tracker) { + devm_kfree(&pf->pdev->dev, pf->sw_irq_tracker); + pf->sw_irq_tracker = NULL; + } + + if (pf->hw_irq_tracker) { + devm_kfree(&pf->pdev->dev, pf->hw_irq_tracker); + pf->hw_irq_tracker = NULL; + } +} + +/** * ice_init_interrupt_scheme - Determine proper interrupt scheme * @pf: board private structure to initialize */ static int ice_init_interrupt_scheme(struct ice_pf *pf) { - int vectors = 0; + int vectors = 0, hw_vectors = 0; ssize_t size; if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) @@ -3226,28 +2030,31 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) /* set up vector assignment tracking */ size = sizeof(struct ice_res_tracker) + (sizeof(u16) * vectors); - pf->irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); - if (!pf->irq_tracker) { + pf->sw_irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + if (!pf->sw_irq_tracker) { ice_dis_msix(pf); return -ENOMEM; } - pf->irq_tracker->num_entries = vectors; + /* populate SW interrupts pool with number of OS granted IRQs. */ + pf->num_avail_sw_msix = vectors; + pf->sw_irq_tracker->num_entries = vectors; - return 0; -} + /* set up HW vector assignment tracking */ + hw_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; + size = sizeof(struct ice_res_tracker) + (sizeof(u16) * hw_vectors); -/** - * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme - * @pf: board private structure - */ -static void ice_clear_interrupt_scheme(struct ice_pf *pf) -{ - if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - ice_dis_msix(pf); + pf->hw_irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + if (!pf->hw_irq_tracker) { + ice_clear_interrupt_scheme(pf); + return -ENOMEM; + } + + /* populate HW interrupts pool with number of HW supported irqs. */ + pf->num_avail_hw_msix = hw_vectors; + pf->hw_irq_tracker->num_entries = hw_vectors; - devm_kfree(&pf->pdev->dev, pf->irq_tracker); - pf->irq_tracker = NULL; + return 0; } /** @@ -3271,7 +2078,7 @@ static int ice_probe(struct pci_dev *pdev, err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); if (err) { - dev_err(&pdev->dev, "I/O map error %d\n", err); + dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err); return err; } @@ -3294,6 +2101,8 @@ static int ice_probe(struct pci_dev *pdev, pf->pdev = pdev; pci_set_drvdata(pdev, pf); set_bit(__ICE_DOWN, pf->state); + /* Disable service task until DOWN bit is cleared */ + set_bit(__ICE_SERVICE_DIS, pf->state); hw = &pf->hw; hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; @@ -3351,6 +2160,9 @@ static int ice_probe(struct pci_dev *pdev, goto err_init_interrupt_unroll; } + /* Driver is mostly up */ + clear_bit(__ICE_DOWN, pf->state); + /* In case of MSIX we are going to setup the misc vector right here * to handle admin queue events etc. In case of legacy and MSI * the misc functionality and queue processing is combined in @@ -3373,7 +2185,11 @@ static int ice_probe(struct pci_dev *pdev, goto err_msix_misc_unroll; } - pf->first_sw->bridge_mode = BRIDGE_MODE_VEB; + if (hw->evb_veb) + pf->first_sw->bridge_mode = BRIDGE_MODE_VEB; + else + pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA; + pf->first_sw->pf = pf; /* record the sw_id available for later use */ @@ -3386,8 +2202,7 @@ static int ice_probe(struct pci_dev *pdev, goto err_alloc_sw_unroll; } - /* Driver is mostly up */ - clear_bit(__ICE_DOWN, pf->state); + clear_bit(__ICE_SERVICE_DIS, pf->state); /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); @@ -3401,6 +2216,7 @@ static int ice_probe(struct pci_dev *pdev, return 0; err_alloc_sw_unroll: + set_bit(__ICE_SERVICE_DIS, pf->state); set_bit(__ICE_DOWN, pf->state); devm_kfree(&pf->pdev->dev, pf->first_sw); err_msix_misc_unroll: @@ -3423,25 +2239,23 @@ err_exit_unroll: static void ice_remove(struct pci_dev *pdev) { struct ice_pf *pf = pci_get_drvdata(pdev); - int i = 0; - int err; + int i; if (!pf) return; set_bit(__ICE_DOWN, pf->state); + ice_service_task_stop(pf); - for (i = 0; i < pf->num_alloc_vsi; i++) { + if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) + ice_free_vfs(pf); + ice_vsi_release_all(pf); + ice_free_irq_msix_misc(pf); + ice_for_each_vsi(pf, i) { if (!pf->vsi[i]) continue; - - err = ice_vsi_release(pf->vsi[i]); - if (err) - dev_dbg(&pf->pdev->dev, "Failed to release VSI index %d (err %d)\n", - i, err); + ice_vsi_free_q_vectors(pf->vsi[i]); } - - ice_free_irq_msix_misc(pf); ice_clear_interrupt_scheme(pf); ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); @@ -3460,8 +2274,6 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_10G_BASE_T), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SGMII), 0 }, /* required last entry */ { 0, } }; @@ -3472,6 +2284,7 @@ static struct pci_driver ice_driver = { .id_table = ice_pci_tbl, .probe = ice_probe, .remove = ice_remove, + .sriov_configure = ice_sriov_configure, }; /** @@ -3487,7 +2300,7 @@ static int __init ice_module_init(void) pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver); pr_info("%s\n", ice_copyright); - ice_wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, KBUILD_MODNAME); + ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME); if (!ice_wq) { pr_err("Failed to create workqueue\n"); return -ENOMEM; @@ -3549,7 +2362,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) } if (test_bit(__ICE_DOWN, pf->state) || - ice_is_reset_recovery_pending(pf->state)) { + ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't set mac %pM. device not ready\n", mac); return -EBUSY; @@ -3709,75 +2522,6 @@ static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], } /** - * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx - * @vsi: the vsi being changed - */ -static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) -{ - struct device *dev = &vsi->back->pdev->dev; - struct ice_hw *hw = &vsi->back->hw; - struct ice_vsi_ctx ctxt = { 0 }; - enum ice_status status; - - /* Here we are configuring the VSI to let the driver add VLAN tags by - * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN - * tag insertion happens in the Tx hot path, in ice_tx_map. - */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL; - - ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); - ctxt.vsi_num = vsi->vsi_num; - - status = ice_aq_update_vsi(hw, &ctxt, NULL); - if (status) { - dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); - return -EIO; - } - - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; - return 0; -} - -/** - * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx - * @vsi: the vsi being changed - * @ena: boolean value indicating if this is a enable or disable request - */ -static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) -{ - struct device *dev = &vsi->back->pdev->dev; - struct ice_hw *hw = &vsi->back->hw; - struct ice_vsi_ctx ctxt = { 0 }; - enum ice_status status; - - /* Here we are configuring what the VSI should do with the VLAN tag in - * the Rx packet. We can either leave the tag in the packet or put it in - * the Rx descriptor. - */ - if (ena) { - /* Strip VLAN tag from Rx packet and put it in the desc */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH; - } else { - /* Disable stripping. Leave tag in packet */ - ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING; - } - - ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); - ctxt.vsi_num = vsi->vsi_num; - - status = ice_aq_update_vsi(hw, &ctxt, NULL); - if (status) { - dev_err(dev, "update VSI for VALN strip failed, ena = %d err %d aq_err %d\n", - ena, status, hw->adminq.sq_last_status); - return -EIO; - } - - vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags; - return 0; -} - -/** * ice_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted * @features: the feature set that the stack is suggesting @@ -3789,6 +2533,12 @@ static int ice_set_features(struct net_device *netdev, struct ice_vsi *vsi = np->vsi; int ret = 0; + if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) + ret = ice_vsi_manage_rss_lut(vsi, true); + else if (!(features & NETIF_F_RXHASH) && + netdev->features & NETIF_F_RXHASH) + ret = ice_vsi_manage_rss_lut(vsi, false); + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) ret = ice_vsi_manage_vlan_stripping(vsi, true); @@ -3847,248 +2597,6 @@ static int ice_restore_vlan(struct ice_vsi *vsi) } /** - * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance - * @ring: The Tx ring to configure - * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized - * @pf_q: queue index in the PF space - * - * Configure the Tx descriptor ring in TLAN context. - */ -static void -ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) -{ - struct ice_vsi *vsi = ring->vsi; - struct ice_hw *hw = &vsi->back->hw; - - tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; - - tlan_ctx->port_num = vsi->port_info->lport; - - /* Transmit Queue Length */ - tlan_ctx->qlen = ring->count; - - /* PF number */ - tlan_ctx->pf_num = hw->pf_id; - - /* queue belongs to a specific VSI type - * VF / VM index should be programmed per vmvf_type setting: - * for vmvf_type = VF, it is VF number between 0-256 - * for vmvf_type = VM, it is VM number between 0-767 - * for PF or EMP this field should be set to zero - */ - switch (vsi->type) { - case ICE_VSI_PF: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; - break; - default: - return; - } - - /* make sure the context is associated with the right VSI */ - tlan_ctx->src_vsi = vsi->vsi_num; - - tlan_ctx->tso_ena = ICE_TX_LEGACY; - tlan_ctx->tso_qnum = pf_q; - - /* Legacy or Advanced Host Interface: - * 0: Advanced Host Interface - * 1: Legacy Host Interface - */ - tlan_ctx->legacy_int = ICE_TX_LEGACY; -} - -/** - * ice_vsi_cfg_txqs - Configure the VSI for Tx - * @vsi: the VSI being configured - * - * Return 0 on success and a negative value on error - * Configure the Tx VSI for operation. - */ -static int ice_vsi_cfg_txqs(struct ice_vsi *vsi) -{ - struct ice_aqc_add_tx_qgrp *qg_buf; - struct ice_aqc_add_txqs_perq *txq; - struct ice_pf *pf = vsi->back; - enum ice_status status; - u16 buf_len, i, pf_q; - int err = 0, tc = 0; - u8 num_q_grps; - - buf_len = sizeof(struct ice_aqc_add_tx_qgrp); - qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL); - if (!qg_buf) - return -ENOMEM; - - if (vsi->num_txq > ICE_MAX_TXQ_PER_TXQG) { - err = -EINVAL; - goto err_cfg_txqs; - } - qg_buf->num_txqs = 1; - num_q_grps = 1; - - /* set up and configure the tx queues */ - ice_for_each_txq(vsi, i) { - struct ice_tlan_ctx tlan_ctx = { 0 }; - - pf_q = vsi->txq_map[i]; - ice_setup_tx_ctx(vsi->tx_rings[i], &tlan_ctx, pf_q); - /* copy context contents into the qg_buf */ - qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); - ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, - ice_tlan_ctx_info); - - /* init queue specific tail reg. It is referred as transmit - * comm scheduler queue doorbell. - */ - vsi->tx_rings[i]->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q); - status = ice_ena_vsi_txq(vsi->port_info, vsi->vsi_num, tc, - num_q_grps, qg_buf, buf_len, NULL); - if (status) { - dev_err(&vsi->back->pdev->dev, - "Failed to set LAN Tx queue context, error: %d\n", - status); - err = -ENODEV; - goto err_cfg_txqs; - } - - /* Add Tx Queue TEID into the VSI tx ring from the response - * This will complete configuring and enabling the queue. - */ - txq = &qg_buf->txqs[0]; - if (pf_q == le16_to_cpu(txq->txq_id)) - vsi->tx_rings[i]->txq_teid = - le32_to_cpu(txq->q_teid); - } -err_cfg_txqs: - devm_kfree(&pf->pdev->dev, qg_buf); - return err; -} - -/** - * ice_setup_rx_ctx - Configure a receive ring context - * @ring: The Rx ring to configure - * - * Configure the Rx descriptor ring in RLAN context. - */ -static int ice_setup_rx_ctx(struct ice_ring *ring) -{ - struct ice_vsi *vsi = ring->vsi; - struct ice_hw *hw = &vsi->back->hw; - u32 rxdid = ICE_RXDID_FLEX_NIC; - struct ice_rlan_ctx rlan_ctx; - u32 regval; - u16 pf_q; - int err; - - /* what is RX queue number in global space of 2K rx queues */ - pf_q = vsi->rxq_map[ring->q_index]; - - /* clear the context structure first */ - memset(&rlan_ctx, 0, sizeof(rlan_ctx)); - - rlan_ctx.base = ring->dma >> 7; - - rlan_ctx.qlen = ring->count; - - /* Receive Packet Data Buffer Size. - * The Packet Data Buffer Size is defined in 128 byte units. - */ - rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; - - /* use 32 byte descriptors */ - rlan_ctx.dsize = 1; - - /* Strip the Ethernet CRC bytes before the packet is posted to host - * memory. - */ - rlan_ctx.crcstrip = 1; - - /* L2TSEL flag defines the reported L2 Tags in the receive descriptor */ - rlan_ctx.l2tsel = 1; - - rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; - rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; - rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; - - /* This controls whether VLAN is stripped from inner headers - * The VLAN in the inner L2 header is stripped to the receive - * descriptor if enabled by this flag. - */ - rlan_ctx.showiv = 0; - - /* Max packet size for this queue - must not be set to a larger value - * than 5 x DBUF - */ - rlan_ctx.rxmax = min_t(u16, vsi->max_frame, - ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len); - - /* Rx queue threshold in units of 64 */ - rlan_ctx.lrxqthresh = 1; - - /* Enable Flexible Descriptors in the queue context which - * allows this driver to select a specific receive descriptor format - */ - regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); - regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & - QRXFLXP_CNTXT_RXDID_IDX_M; - - /* increasing context priority to pick up profile id; - * default is 0x01; setting to 0x03 to ensure profile - * is programming if prev context is of same priority - */ - regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & - QRXFLXP_CNTXT_RXDID_PRIO_M; - - wr32(hw, QRXFLXP_CNTXT(pf_q), regval); - - /* Absolute queue number out of 2K needs to be passed */ - err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); - if (err) { - dev_err(&vsi->back->pdev->dev, - "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", - pf_q, err); - return -EIO; - } - - /* init queue specific tail register */ - ring->tail = hw->hw_addr + QRX_TAIL(pf_q); - writel(0, ring->tail); - ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); - - return 0; -} - -/** - * ice_vsi_cfg_rxqs - Configure the VSI for Rx - * @vsi: the VSI being configured - * - * Return 0 on success and a negative value on error - * Configure the Rx VSI for operation. - */ -static int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) -{ - int err = 0; - u16 i; - - if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN) - vsi->max_frame = vsi->netdev->mtu + - ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; - else - vsi->max_frame = ICE_RXBUF_2048; - - vsi->rx_buf_len = ICE_RXBUF_2048; - /* set up individual rings */ - for (i = 0; i < vsi->num_rxq && !err; i++) - err = ice_setup_rx_ctx(vsi->rx_rings[i]); - - if (err) { - dev_err(&vsi->back->pdev->dev, "ice_setup_rx_ctx failed\n"); - return -EIO; - } - return err; -} - -/** * ice_vsi_cfg - Setup the VSI * @vsi: the VSI being configured * @@ -4098,11 +2606,12 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) { int err; - ice_set_rx_mode(vsi->netdev); - - err = ice_restore_vlan(vsi); - if (err) - return err; + if (vsi->netdev) { + ice_set_rx_mode(vsi->netdev); + err = ice_restore_vlan(vsi); + if (err) + return err; + } err = ice_vsi_cfg_txqs(vsi); if (!err) @@ -4112,200 +2621,6 @@ static int ice_vsi_cfg(struct ice_vsi *vsi) } /** - * ice_vsi_stop_tx_rings - Disable Tx rings - * @vsi: the VSI being configured - */ -static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - enum ice_status status; - u32 *q_teids, val; - u16 *q_ids, i; - int err = 0; - - if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) - return -EINVAL; - - q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids), - GFP_KERNEL); - if (!q_teids) - return -ENOMEM; - - q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids), - GFP_KERNEL); - if (!q_ids) { - err = -ENOMEM; - goto err_alloc_q_ids; - } - - /* set up the tx queue list to be disabled */ - ice_for_each_txq(vsi, i) { - u16 v_idx; - - if (!vsi->tx_rings || !vsi->tx_rings[i]) { - err = -EINVAL; - goto err_out; - } - - q_ids[i] = vsi->txq_map[i]; - q_teids[i] = vsi->tx_rings[i]->txq_teid; - - /* clear cause_ena bit for disabled queues */ - val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx)); - val &= ~QINT_TQCTL_CAUSE_ENA_M; - wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val); - - /* software is expected to wait for 100 ns */ - ndelay(100); - - /* trigger a software interrupt for the vector associated to - * the queue to schedule napi handler - */ - v_idx = vsi->tx_rings[i]->q_vector->v_idx; - wr32(hw, GLINT_DYN_CTL(vsi->base_vector + v_idx), - GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); - } - status = ice_dis_vsi_txq(vsi->port_info, vsi->num_txq, q_ids, q_teids, - NULL); - if (status) { - dev_err(&pf->pdev->dev, - "Failed to disable LAN Tx queues, error: %d\n", - status); - err = -ENODEV; - } - -err_out: - devm_kfree(&pf->pdev->dev, q_ids); - -err_alloc_q_ids: - devm_kfree(&pf->pdev->dev, q_teids); - - return err; -} - -/** - * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled - * @pf: the PF being configured - * @pf_q: the PF queue - * @ena: enable or disable state of the queue - * - * This routine will wait for the given Rx queue of the PF to reach the - * enabled or disabled state. - * Returns -ETIMEDOUT in case of failing to reach the requested state after - * multiple retries; else will return 0 in case of success. - */ -static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) -{ - int i; - - for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) { - u32 rx_reg = rd32(&pf->hw, QRX_CTRL(pf_q)); - - if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) - break; - - usleep_range(10, 20); - } - if (i >= ICE_Q_WAIT_RETRY_LIMIT) - return -ETIMEDOUT; - - return 0; -} - -/** - * ice_vsi_ctrl_rx_rings - Start or stop a VSI's rx rings - * @vsi: the VSI being configured - * @ena: start or stop the rx rings - */ -static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena) -{ - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; - int i, j, ret = 0; - - for (i = 0; i < vsi->num_rxq; i++) { - int pf_q = vsi->rxq_map[i]; - u32 rx_reg; - - for (j = 0; j < ICE_Q_WAIT_MAX_RETRY; j++) { - rx_reg = rd32(hw, QRX_CTRL(pf_q)); - if (((rx_reg >> QRX_CTRL_QENA_REQ_S) & 1) == - ((rx_reg >> QRX_CTRL_QENA_STAT_S) & 1)) - break; - usleep_range(1000, 2000); - } - - /* Skip if the queue is already in the requested state */ - if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) - continue; - - /* turn on/off the queue */ - if (ena) - rx_reg |= QRX_CTRL_QENA_REQ_M; - else - rx_reg &= ~QRX_CTRL_QENA_REQ_M; - wr32(hw, QRX_CTRL(pf_q), rx_reg); - - /* wait for the change to finish */ - ret = ice_pf_rxq_wait(pf, pf_q, ena); - if (ret) { - dev_err(&pf->pdev->dev, - "VSI idx %d Rx ring %d %sable timeout\n", - vsi->idx, pf_q, (ena ? "en" : "dis")); - break; - } - } - - return ret; -} - -/** - * ice_vsi_start_rx_rings - start VSI's rx rings - * @vsi: the VSI whose rings are to be started - * - * Returns 0 on success and a negative value on error - */ -static int ice_vsi_start_rx_rings(struct ice_vsi *vsi) -{ - return ice_vsi_ctrl_rx_rings(vsi, true); -} - -/** - * ice_vsi_stop_rx_rings - stop VSI's rx rings - * @vsi: the VSI - * - * Returns 0 on success and a negative value on error - */ -static int ice_vsi_stop_rx_rings(struct ice_vsi *vsi) -{ - return ice_vsi_ctrl_rx_rings(vsi, false); -} - -/** - * ice_vsi_stop_tx_rx_rings - stop VSI's tx and rx rings - * @vsi: the VSI - * Returns 0 on success and a negative value on error - */ -static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi) -{ - int err_tx, err_rx; - - err_tx = ice_vsi_stop_tx_rings(vsi); - if (err_tx) - dev_dbg(&vsi->back->pdev->dev, "Failed to disable Tx rings\n"); - - err_rx = ice_vsi_stop_rx_rings(vsi); - if (err_rx) - dev_dbg(&vsi->back->pdev->dev, "Failed to disable Rx rings\n"); - - if (err_tx || err_rx) - return -EIO; - - return 0; -} - -/** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured */ @@ -4402,122 +2717,6 @@ static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, } /** - * ice_stat_update40 - read 40 bit stat from the chip and update stat values - * @hw: ptr to the hardware info - * @hireg: high 32 bit HW register to read from - * @loreg: low 32 bit HW register to read from - * @prev_stat_loaded: bool to specify if previous stats are loaded - * @prev_stat: ptr to previous loaded stat value - * @cur_stat: ptr to current stat value - */ -static void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, - bool prev_stat_loaded, u64 *prev_stat, - u64 *cur_stat) -{ - u64 new_data; - - new_data = rd32(hw, loreg); - new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; - - /* device stats are not reset at PFR, they likely will not be zeroed - * when the driver starts. So save the first values read and use them as - * offsets to be subtracted from the raw values in order to report stats - * that count from zero. - */ - if (!prev_stat_loaded) - *prev_stat = new_data; - if (likely(new_data >= *prev_stat)) - *cur_stat = new_data - *prev_stat; - else - /* to manage the potential roll-over */ - *cur_stat = (new_data + BIT_ULL(40)) - *prev_stat; - *cur_stat &= 0xFFFFFFFFFFULL; -} - -/** - * ice_stat_update32 - read 32 bit stat from the chip and update stat values - * @hw: ptr to the hardware info - * @reg: HW register to read from - * @prev_stat_loaded: bool to specify if previous stats are loaded - * @prev_stat: ptr to previous loaded stat value - * @cur_stat: ptr to current stat value - */ -static void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, - u64 *prev_stat, u64 *cur_stat) -{ - u32 new_data; - - new_data = rd32(hw, reg); - - /* device stats are not reset at PFR, they likely will not be zeroed - * when the driver starts. So save the first values read and use them as - * offsets to be subtracted from the raw values in order to report stats - * that count from zero. - */ - if (!prev_stat_loaded) - *prev_stat = new_data; - if (likely(new_data >= *prev_stat)) - *cur_stat = new_data - *prev_stat; - else - /* to manage the potential roll-over */ - *cur_stat = (new_data + BIT_ULL(32)) - *prev_stat; -} - -/** - * ice_update_eth_stats - Update VSI-specific ethernet statistics counters - * @vsi: the VSI to be updated - */ -static void ice_update_eth_stats(struct ice_vsi *vsi) -{ - struct ice_eth_stats *prev_es, *cur_es; - struct ice_hw *hw = &vsi->back->hw; - u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */ - - prev_es = &vsi->eth_stats_prev; - cur_es = &vsi->eth_stats; - - ice_stat_update40(hw, GLV_GORCH(vsi_num), GLV_GORCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_bytes, - &cur_es->rx_bytes); - - ice_stat_update40(hw, GLV_UPRCH(vsi_num), GLV_UPRCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_unicast, - &cur_es->rx_unicast); - - ice_stat_update40(hw, GLV_MPRCH(vsi_num), GLV_MPRCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_multicast, - &cur_es->rx_multicast); - - ice_stat_update40(hw, GLV_BPRCH(vsi_num), GLV_BPRCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->rx_broadcast, - &cur_es->rx_broadcast); - - ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded, - &prev_es->rx_discards, &cur_es->rx_discards); - - ice_stat_update40(hw, GLV_GOTCH(vsi_num), GLV_GOTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_bytes, - &cur_es->tx_bytes); - - ice_stat_update40(hw, GLV_UPTCH(vsi_num), GLV_UPTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_unicast, - &cur_es->tx_unicast); - - ice_stat_update40(hw, GLV_MPTCH(vsi_num), GLV_MPTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_multicast, - &cur_es->tx_multicast); - - ice_stat_update40(hw, GLV_BPTCH(vsi_num), GLV_BPTCL(vsi_num), - vsi->stat_offsets_loaded, &prev_es->tx_broadcast, - &cur_es->tx_broadcast); - - ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded, - &prev_es->tx_errors, &cur_es->tx_errors); - - vsi->stat_offsets_loaded = true; -} - -/** * ice_update_vsi_ring_stats - Update VSI stats counters * @vsi: the VSI to be updated */ @@ -4789,30 +2988,6 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) stats->rx_length_errors = vsi_stats->rx_length_errors; } -#ifdef CONFIG_NET_POLL_CONTROLLER -/** - * ice_netpoll - polling "interrupt" handler - * @netdev: network interface device structure - * - * Used by netconsole to send skbs without having to re-enable interrupts. - * This is not called in the normal interrupt path. - */ -static void ice_netpoll(struct net_device *netdev) -{ - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; - struct ice_pf *pf = vsi->back; - int i; - - if (test_bit(__ICE_DOWN, vsi->state) || - !test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) - return; - - for (i = 0; i < vsi->num_q_vectors; i++) - ice_msix_clean_rings(0, vsi->q_vectors[i]); -} -#endif /* CONFIG_NET_POLL_CONTROLLER */ - /** * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI * @vsi: VSI having NAPI disabled @@ -4834,7 +3009,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) */ int ice_down(struct ice_vsi *vsi) { - int i, err; + int i, tx_err, rx_err; /* Caller of this function is expected to set the * vsi->state __ICE_DOWN bit @@ -4845,7 +3020,18 @@ int ice_down(struct ice_vsi *vsi) } ice_vsi_dis_irq(vsi); - err = ice_vsi_stop_tx_rx_rings(vsi); + tx_err = ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0); + if (tx_err) + netdev_err(vsi->netdev, + "Failed stop Tx rings, VSI %d error %d\n", + vsi->vsi_num, tx_err); + + rx_err = ice_vsi_stop_rx_rings(vsi); + if (rx_err) + netdev_err(vsi->netdev, + "Failed stop Rx rings, VSI %d error %d\n", + vsi->vsi_num, rx_err); + ice_napi_disable_all(vsi); ice_for_each_txq(vsi, i) @@ -4854,10 +3040,14 @@ int ice_down(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) ice_clean_rx_ring(vsi->rx_rings[i]); - if (err) - netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", + if (tx_err || rx_err) { + netdev_err(vsi->netdev, + "Failed to close VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); - return err; + return -EIO; + } + + return 0; } /** @@ -4868,7 +3058,7 @@ int ice_down(struct ice_vsi *vsi) */ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_txq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", @@ -4877,6 +3067,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) } ice_for_each_txq(vsi, i) { + vsi->tx_rings[i]->netdev = vsi->netdev; err = ice_setup_tx_ring(vsi->tx_rings[i]); if (err) break; @@ -4893,7 +3084,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) */ static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) { - int i, err; + int i, err = 0; if (!vsi->num_rxq) { dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", @@ -4902,6 +3093,7 @@ static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) } ice_for_each_rxq(vsi, i) { + vsi->rx_rings[i]->netdev = vsi->netdev; err = ice_setup_rx_ring(vsi->rx_rings[i]); if (err) break; @@ -4929,38 +3121,6 @@ static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename) } /** - * ice_vsi_free_tx_rings - Free Tx resources for VSI queues - * @vsi: the VSI having resources freed - */ -static void ice_vsi_free_tx_rings(struct ice_vsi *vsi) -{ - int i; - - if (!vsi->tx_rings) - return; - - ice_for_each_txq(vsi, i) - if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) - ice_free_tx_ring(vsi->tx_rings[i]); -} - -/** - * ice_vsi_free_rx_rings - Free Rx resources for VSI queues - * @vsi: the VSI having resources freed - */ -static void ice_vsi_free_rx_rings(struct ice_vsi *vsi) -{ - int i; - - if (!vsi->rx_rings) - return; - - ice_for_each_rxq(vsi, i) - if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc) - ice_free_rx_ring(vsi->rx_rings[i]); -} - -/** * ice_vsi_open - Called when a network interface is made active * @vsi: the VSI to open * @@ -5021,78 +3181,26 @@ err_setup_tx: } /** - * ice_vsi_close - Shut down a VSI - * @vsi: the VSI being shut down + * ice_vsi_release_all - Delete all VSIs + * @pf: PF from which all VSIs are being removed */ -static void ice_vsi_close(struct ice_vsi *vsi) +static void ice_vsi_release_all(struct ice_pf *pf) { - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) - ice_down(vsi); - - ice_vsi_free_irq(vsi); - ice_vsi_free_tx_rings(vsi); - ice_vsi_free_rx_rings(vsi); -} - -/** - * ice_rss_clean - Delete RSS related VSI structures that hold user inputs - * @vsi: the VSI being removed - */ -static void ice_rss_clean(struct ice_vsi *vsi) -{ - struct ice_pf *pf; - - pf = vsi->back; + int err, i; - if (vsi->rss_hkey_user) - devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user); - if (vsi->rss_lut_user) - devm_kfree(&pf->pdev->dev, vsi->rss_lut_user); -} - -/** - * ice_vsi_release - Delete a VSI and free its resources - * @vsi: the VSI being removed - * - * Returns 0 on success or < 0 on error - */ -static int ice_vsi_release(struct ice_vsi *vsi) -{ - struct ice_pf *pf; + if (!pf->vsi) + return; - if (!vsi->back) - return -ENODEV; - pf = vsi->back; + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (!pf->vsi[i]) + continue; - if (vsi->netdev) { - unregister_netdev(vsi->netdev); - free_netdev(vsi->netdev); - vsi->netdev = NULL; + err = ice_vsi_release(pf->vsi[i]); + if (err) + dev_dbg(&pf->pdev->dev, + "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", + i, err, pf->vsi[i]->vsi_num); } - - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) - ice_rss_clean(vsi); - - /* Disable VSI and free resources */ - ice_vsi_dis_irq(vsi); - ice_vsi_close(vsi); - - /* reclaim interrupt vectors back to PF */ - ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx); - pf->num_avail_msix += vsi->num_q_vectors; - - ice_remove_vsi_fltr(&pf->hw, vsi->vsi_num); - ice_vsi_delete(vsi); - ice_vsi_free_q_vectors(vsi); - ice_vsi_clear_rings(vsi); - - ice_vsi_put_qs(vsi); - pf->q_left_tx += vsi->alloc_txq; - pf->q_left_rx += vsi->alloc_rxq; - - ice_vsi_clear(vsi); - - return 0; } /** @@ -5106,28 +3214,37 @@ static void ice_dis_vsi(struct ice_vsi *vsi) set_bit(__ICE_NEEDS_RESTART, vsi->state); - if (vsi->netdev && netif_running(vsi->netdev) && - vsi->type == ICE_VSI_PF) - vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); - - ice_vsi_close(vsi); + if (vsi->type == ICE_VSI_PF && vsi->netdev) { + if (netif_running(vsi->netdev)) { + rtnl_lock(); + vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); + rtnl_unlock(); + } else { + ice_vsi_close(vsi); + } + } } /** * ice_ena_vsi - resume a VSI * @vsi: the VSI being resume */ -static void ice_ena_vsi(struct ice_vsi *vsi) +static int ice_ena_vsi(struct ice_vsi *vsi) { - if (!test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state)) - return; + int err = 0; - if (vsi->netdev && netif_running(vsi->netdev)) - vsi->netdev->netdev_ops->ndo_open(vsi->netdev); - else if (ice_vsi_open(vsi)) - /* this clears the DOWN bit */ - dev_dbg(&vsi->back->pdev->dev, "Failed open VSI 0x%04X on switch 0x%04X\n", - vsi->vsi_num, vsi->vsw->sw_id); + if (test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state) && + vsi->netdev) { + if (netif_running(vsi->netdev)) { + rtnl_lock(); + err = vsi->netdev->netdev_ops->ndo_open(vsi->netdev); + rtnl_unlock(); + } else { + err = ice_vsi_open(vsi); + } + } + + return err; } /** @@ -5147,13 +3264,89 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf) * ice_pf_ena_all_vsi - Resume all VSIs on a PF * @pf: the PF */ -static void ice_pf_ena_all_vsi(struct ice_pf *pf) +static int ice_pf_ena_all_vsi(struct ice_pf *pf) { int v; ice_for_each_vsi(pf, v) if (pf->vsi[v]) - ice_ena_vsi(pf->vsi[v]); + if (ice_ena_vsi(pf->vsi[v])) + return -EIO; + + return 0; +} + +/** + * ice_vsi_rebuild_all - rebuild all VSIs in pf + * @pf: the PF + */ +static int ice_vsi_rebuild_all(struct ice_pf *pf) +{ + int i; + + /* loop through pf->vsi array and reinit the VSI if found */ + for (i = 0; i < pf->num_alloc_vsi; i++) { + int err; + + if (!pf->vsi[i]) + continue; + + /* VF VSI rebuild isn't supported yet */ + if (pf->vsi[i]->type == ICE_VSI_VF) + continue; + + err = ice_vsi_rebuild(pf->vsi[i]); + if (err) { + dev_err(&pf->pdev->dev, + "VSI at index %d rebuild failed\n", + pf->vsi[i]->idx); + return err; + } + + dev_info(&pf->pdev->dev, + "VSI at index %d rebuilt. vsi_num = 0x%x\n", + pf->vsi[i]->idx, pf->vsi[i]->vsi_num); + } + + return 0; +} + +/** + * ice_vsi_replay_all - replay all VSIs configuration in the PF + * @pf: the PF + */ +static int ice_vsi_replay_all(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + enum ice_status ret; + int i; + + /* loop through pf->vsi array and replay the VSI if found */ + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (!pf->vsi[i]) + continue; + + ret = ice_replay_vsi(hw, pf->vsi[i]->idx); + if (ret) { + dev_err(&pf->pdev->dev, + "VSI at index %d replay failed %d\n", + pf->vsi[i]->idx, ret); + return -EIO; + } + + /* Re-map HW VSI number, using VSI handle that has been + * previously validated in ice_replay_vsi() call above + */ + pf->vsi[i]->vsi_num = ice_get_hw_vsi_num(hw, pf->vsi[i]->idx); + + dev_info(&pf->pdev->dev, + "VSI at index %d filter replayed successfully - vsi_num %i\n", + pf->vsi[i]->idx, pf->vsi[i]->vsi_num); + } + + /* Clean up replay filter after successful re-configuration */ + ice_replay_post(hw); + return 0; } /** @@ -5175,13 +3368,13 @@ static void ice_rebuild(struct ice_pf *pf) ret = ice_init_all_ctrlq(hw); if (ret) { dev_err(dev, "control queues init failed %d\n", ret); - goto fail_reset; + goto err_init_ctrlq; } ret = ice_clear_pf_cfg(hw); if (ret) { dev_err(dev, "clear PF configuration failed %d\n", ret); - goto fail_reset; + goto err_init_ctrlq; } ice_clear_pxe_mode(hw); @@ -5189,14 +3382,34 @@ static void ice_rebuild(struct ice_pf *pf) ret = ice_get_caps(hw); if (ret) { dev_err(dev, "ice_get_caps failed %d\n", ret); - goto fail_reset; + goto err_init_ctrlq; } - /* basic nic switch setup */ - err = ice_setup_pf_sw(pf); + err = ice_sched_init_port(hw->port_info); + if (err) + goto err_sched_init_port; + + /* reset search_hint of irq_trackers to 0 since interrupts are + * reclaimed and could be allocated from beginning during VSI rebuild + */ + pf->sw_irq_tracker->search_hint = 0; + pf->hw_irq_tracker->search_hint = 0; + + err = ice_vsi_rebuild_all(pf); if (err) { - dev_err(dev, "ice_setup_pf_sw failed\n"); - goto fail_reset; + dev_err(dev, "ice_vsi_rebuild_all failed\n"); + goto err_vsi_rebuild; + } + + err = ice_update_link_info(hw->port_info); + if (err) + dev_err(&pf->pdev->dev, "Get link status error %d\n", err); + + /* Replay all VSIs Configuration, including filters after reset */ + if (ice_vsi_replay_all(pf)) { + dev_err(&pf->pdev->dev, + "error replaying VSI configurations with switch filter rules\n"); + goto err_vsi_rebuild; } /* start misc vector */ @@ -5204,20 +3417,36 @@ static void ice_rebuild(struct ice_pf *pf) err = ice_req_irq_msix_misc(pf); if (err) { dev_err(dev, "misc vector setup failed: %d\n", err); - goto fail_reset; + goto err_vsi_rebuild; } } /* restart the VSIs that were rebuilt and running before the reset */ - ice_pf_ena_all_vsi(pf); + err = ice_pf_ena_all_vsi(pf); + if (err) { + dev_err(&pf->pdev->dev, "error enabling VSIs\n"); + /* no need to disable VSIs in tear down path in ice_rebuild() + * since its already taken care in ice_vsi_open() + */ + goto err_vsi_rebuild; + } + ice_reset_all_vfs(pf, true); + /* if we get here, reset flow is successful */ + clear_bit(__ICE_RESET_FAILED, pf->state); return; -fail_reset: +err_vsi_rebuild: + ice_vsi_release_all(pf); +err_sched_init_port: + ice_sched_cleanup_all(hw); +err_init_ctrlq: ice_shutdown_all_ctrlq(hw); set_bit(__ICE_RESET_FAILED, pf->state); clear_recovery: - set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state); + /* set this bit in PF state to control service task scheduling */ + set_bit(__ICE_NEEDS_RESTART, pf->state); + dev_err(dev, "Rebuild failed, unload and reload driver\n"); } /** @@ -5235,7 +3464,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) u8 count = 0; if (new_mtu == netdev->mtu) { - netdev_warn(netdev, "mtu is already %d\n", netdev->mtu); + netdev_warn(netdev, "mtu is already %u\n", netdev->mtu); return 0; } @@ -5250,7 +3479,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } /* if a reset is in progress, wait for some time for it to complete */ do { - if (ice_is_reset_recovery_pending(pf->state)) { + if (ice_is_reset_in_progress(pf->state)) { count++; usleep_range(1000, 2000); } else { @@ -5306,7 +3535,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) struct ice_aqc_get_set_rss_keys *buf = (struct ice_aqc_get_set_rss_keys *)seed; - status = ice_aq_set_rss_key(hw, vsi->vsi_num, buf); + status = ice_aq_set_rss_key(hw, vsi->idx, buf); if (status) { dev_err(&pf->pdev->dev, @@ -5317,8 +3546,8 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) } if (lut) { - status = ice_aq_set_rss_lut(hw, vsi->vsi_num, - vsi->rss_lut_type, lut, lut_size); + status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, + lut, lut_size); if (status) { dev_err(&pf->pdev->dev, "Cannot set RSS lut, err %d aq_err %d\n", @@ -5349,7 +3578,7 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) struct ice_aqc_get_set_rss_keys *buf = (struct ice_aqc_get_set_rss_keys *)seed; - status = ice_aq_get_rss_key(hw, vsi->vsi_num, buf); + status = ice_aq_get_rss_key(hw, vsi->idx, buf); if (status) { dev_err(&pf->pdev->dev, "Cannot get RSS key, err %d aq_err %d\n", @@ -5359,8 +3588,8 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) } if (lut) { - status = ice_aq_get_rss_lut(hw, vsi->vsi_num, - vsi->rss_lut_type, lut, lut_size); + status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, + lut, lut_size); if (status) { dev_err(&pf->pdev->dev, "Cannot get RSS lut, err %d aq_err %d\n", @@ -5373,6 +3602,232 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) } /** + * ice_bridge_getlink - Get the hardware bridge mode + * @skb: skb buff + * @pid: process id + * @seq: RTNL message seq + * @dev: the netdev being configured + * @filter_mask: filter mask passed in + * @nlflags: netlink flags passed in + * + * Return the bridge mode (VEB/VEPA) + */ +static int +ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, int nlflags) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u16 bmode; + + bmode = pf->first_sw->bridge_mode; + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags, + filter_mask, NULL); +} + +/** + * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA) + * @vsi: Pointer to VSI structure + * @bmode: Hardware bridge mode (VEB/VEPA) + * + * Returns 0 on success, negative on failure + */ +static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) +{ + struct device *dev = &vsi->back->pdev->dev; + struct ice_aqc_vsi_props *vsi_props; + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx ctxt = { 0 }; + enum ice_status status; + + vsi_props = &vsi->info; + ctxt.info = vsi->info; + + if (bmode == BRIDGE_MODE_VEB) + /* change from VEPA to VEB mode */ + ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB; + else + /* change from VEB to VEPA mode */ + ctxt.info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB; + ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); + + status = ice_update_vsi(hw, vsi->idx, &ctxt, NULL); + if (status) { + dev_err(dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", + bmode, status, hw->adminq.sq_last_status); + return -EIO; + } + /* Update sw flags for book keeping */ + vsi_props->sw_flags = ctxt.info.sw_flags; + + return 0; +} + +/** + * ice_bridge_setlink - Set the hardware bridge mode + * @dev: the netdev being configured + * @nlh: RTNL message + * @flags: bridge setlink flags + * + * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is + * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if + * not already set for all VSIs connected to this switch. And also update the + * unicast switch filter rules for the corresponding switch of the netdev. + */ +static int +ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 __always_unused flags) +{ + struct ice_netdev_priv *np = netdev_priv(dev); + struct ice_pf *pf = np->vsi->back; + struct nlattr *attr, *br_spec; + struct ice_hw *hw = &pf->hw; + enum ice_status status; + struct ice_sw *pf_sw; + int rem, v, err = 0; + + pf_sw = pf->first_sw; + /* find the attribute in the netlink message */ + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + + nla_for_each_nested(attr, br_spec, rem) { + __u16 mode; + + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + mode = nla_get_u16(attr); + if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB) + return -EINVAL; + /* Continue if bridge mode is not being flipped */ + if (mode == pf_sw->bridge_mode) + continue; + /* Iterates through the PF VSI list and update the loopback + * mode of the VSI + */ + ice_for_each_vsi(pf, v) { + if (!pf->vsi[v]) + continue; + err = ice_vsi_update_bridge_mode(pf->vsi[v], mode); + if (err) + return err; + } + + hw->evb_veb = (mode == BRIDGE_MODE_VEB); + /* Update the unicast switch filter rules for the corresponding + * switch of the netdev + */ + status = ice_update_sw_rule_bridge_mode(hw); + if (status) { + netdev_err(dev, "update SW_RULE for bridge mode failed, = %d err %d aq_err %d\n", + mode, status, hw->adminq.sq_last_status); + /* revert hw->evb_veb */ + hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB); + return -EIO; + } + + pf_sw->bridge_mode = mode; + } + + return 0; +} + +/** + * ice_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + */ +static void ice_tx_timeout(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_ring *tx_ring = NULL; + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u32 head, val = 0, i; + int hung_queue = -1; + + pf->tx_timeout_count++; + + /* find the stopped queue the same way the stack does */ + for (i = 0; i < netdev->num_tx_queues; i++) { + struct netdev_queue *q; + unsigned long trans_start; + + q = netdev_get_tx_queue(netdev, i); + trans_start = q->trans_start; + if (netif_xmit_stopped(q) && + time_after(jiffies, + (trans_start + netdev->watchdog_timeo))) { + hung_queue = i; + break; + } + } + + if (i == netdev->num_tx_queues) { + netdev_info(netdev, "tx_timeout: no netdev hung queue found\n"); + } else { + /* now that we have an index, find the tx_ring struct */ + for (i = 0; i < vsi->num_txq; i++) { + if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { + if (hung_queue == + vsi->tx_rings[i]->q_index) { + tx_ring = vsi->tx_rings[i]; + break; + } + } + } + } + + /* Reset recovery level if enough time has elapsed after last timeout. + * Also ensure no new reset action happens before next timeout period. + */ + if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20))) + pf->tx_timeout_recovery_level = 1; + else if (time_before(jiffies, (pf->tx_timeout_last_recovery + + netdev->watchdog_timeo))) + return; + + if (tx_ring) { + head = tx_ring->next_to_clean; + /* Read interrupt register */ + if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) + val = rd32(&pf->hw, + GLINT_DYN_CTL(tx_ring->q_vector->v_idx + + tx_ring->vsi->hw_base_vector)); + + netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n", + vsi->vsi_num, hung_queue, tx_ring->next_to_clean, + head, tx_ring->next_to_use, + readl(tx_ring->tail), val); + } + + pf->tx_timeout_last_recovery = jiffies; + netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n", + pf->tx_timeout_recovery_level, hung_queue); + + switch (pf->tx_timeout_recovery_level) { + case 1: + set_bit(__ICE_PFR_REQ, pf->state); + break; + case 2: + set_bit(__ICE_CORER_REQ, pf->state); + break; + case 3: + set_bit(__ICE_GLOBR_REQ, pf->state); + break; + default: + netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); + set_bit(__ICE_DOWN, pf->state); + set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(__ICE_SERVICE_DIS, pf->state); + break; + } + + ice_service_task_schedule(pf); + pf->tx_timeout_recovery_level++; +} + +/** * ice_open - Called when a network interface becomes active * @netdev: network interface device structure * @@ -5390,6 +3845,11 @@ static int ice_open(struct net_device *netdev) struct ice_vsi *vsi = np->vsi; int err; + if (test_bit(__ICE_NEEDS_RESTART, vsi->back->state)) { + netdev_err(netdev, "driver needs to be unloaded and reloaded\n"); + return -EIO; + } + netif_carrier_off(netdev); err = ice_vsi_open(vsi); @@ -5480,12 +3940,18 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ice_netpoll, -#endif /* CONFIG_NET_POLL_CONTROLLER */ + .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, + .ndo_set_vf_mac = ice_set_vf_mac, + .ndo_get_vf_config = ice_get_vf_cfg, + .ndo_set_vf_trust = ice_set_vf_trust, + .ndo_set_vf_vlan = ice_set_vf_port_vlan, + .ndo_set_vf_link_state = ice_set_vf_link_state, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_set_features = ice_set_features, + .ndo_bridge_getlink = ice_bridge_getlink, + .ndo_bridge_setlink = ice_bridge_setlink, .ndo_fdb_add = ice_fdb_add, .ndo_fdb_del = ice_fdb_del, + .ndo_tx_timeout = ice_tx_timeout, }; |