diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_main.c')
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_main.c | 1235 |
1 files changed, 836 insertions, 399 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b40dfe6ae321..1766230abfff 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1,18 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2018, Intel Corporation. */ +/* Copyright (c) 2018-2023, Intel Corporation. */ /* Intel(R) Ethernet Connection E800 Series Linux Driver */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <generated/utsrelease.h> +#include <linux/crash_dump.h> #include "ice.h" #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" -#include "ice_devlink.h" +#include "devlink/devlink.h" +#include "devlink/devlink_port.h" +#include "ice_hwmon.h" /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the * ice tracepoint functions. This must be done exactly once across the * ice driver. @@ -34,6 +37,7 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_IMPORT_NS(LIBIE); MODULE_LICENSE("GPL v2"); MODULE_FIRMWARE(ICE_DDP_PKG_FILE); @@ -64,6 +68,7 @@ struct device *ice_hw_to_dev(struct ice_hw *hw) } static struct workqueue_struct *ice_wq; +struct workqueue_struct *ice_lag_wq; static const struct net_device_ops ice_netdev_safe_mode_ops; static const struct net_device_ops ice_netdev_ops; @@ -80,7 +85,7 @@ ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *data, void (*cleanup)(struct flow_block_cb *block_cb)); -bool netif_is_ice(struct net_device *dev) +bool netif_is_ice(const struct net_device *dev) { return dev && (dev->netdev_ops == &ice_netdev_ops); } @@ -610,7 +615,7 @@ skip: ice_pf_dis_all_vsi(pf, false); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - ice_ptp_prepare_for_reset(pf); + ice_ptp_prepare_for_reset(pf, reset_type); if (ice_is_feature_supported(pf, ICE_F_GNSS)) ice_gnss_exit(pf); @@ -635,6 +640,11 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); + if (pf->lag && pf->lag->bonded && reset_type == ICE_RESET_PFR) { + dev_dbg(dev, "PFR on a bonded interface, promoting to CORER\n"); + reset_type = ICE_RESET_CORER; + } + ice_prepare_for_reset(pf, reset_type); /* trigger the reset */ @@ -718,8 +728,13 @@ static void ice_reset_subtask(struct ice_pf *pf) } /* No pending resets to finish processing. Check for new resets */ - if (test_bit(ICE_PFR_REQ, pf->state)) + if (test_bit(ICE_PFR_REQ, pf->state)) { reset_type = ICE_RESET_PFR; + if (pf->lag && pf->lag->bonded) { + dev_dbg(ice_pf_to_dev(pf), "PFR on a bonded interface, promoting to CORER\n"); + reset_type = ICE_RESET_CORER; + } + } if (test_bit(ICE_CORER_REQ, pf->state)) reset_type = ICE_RESET_CORER; if (test_bit(ICE_GLOBR_REQ, pf->state)) @@ -790,6 +805,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) } switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_200GB: + speed = "200 G"; + break; case ICE_AQ_LINK_SPEED_100GB: speed = "100 G"; break; @@ -967,7 +985,7 @@ static void ice_set_dflt_mib(struct ice_pf *pf) * Octets 13 - 20 are TSA values - leave as zeros */ buf[5] = 0x64; - len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S; + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); offset += len + 2; tlv = (struct ice_lldp_org_tlv *) ((char *)tlv + sizeof(tlv->typelen) + len); @@ -1001,7 +1019,7 @@ static void ice_set_dflt_mib(struct ice_pf *pf) /* Octet 1 left as all zeros - PFC disabled */ buf[0] = 0x08; - len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S; + len = FIELD_GET(ICE_LLDP_TLV_LEN_M, typelen); offset += len + 2; if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL)) @@ -1239,64 +1257,89 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) return status; } -enum ice_aq_task_state { - ICE_AQ_TASK_WAITING = 0, - ICE_AQ_TASK_COMPLETE, - ICE_AQ_TASK_CANCELED, -}; +/** + * ice_get_fwlog_data - copy the FW log data from ARQ event + * @pf: PF that the FW log event is associated with + * @event: event structure containing FW log data + */ +static void +ice_get_fwlog_data(struct ice_pf *pf, struct ice_rq_event_info *event) +{ + struct ice_fwlog_data *fwlog; + struct ice_hw *hw = &pf->hw; -struct ice_aq_task { - struct hlist_node entry; + fwlog = &hw->fwlog_ring.rings[hw->fwlog_ring.tail]; - u16 opcode; - struct ice_rq_event_info *event; - enum ice_aq_task_state state; -}; + memset(fwlog->data, 0, PAGE_SIZE); + fwlog->data_size = le16_to_cpu(event->desc.datalen); + + memcpy(fwlog->data, event->msg_buf, fwlog->data_size); + ice_fwlog_ring_increment(&hw->fwlog_ring.tail, hw->fwlog_ring.size); + + if (ice_fwlog_ring_full(&hw->fwlog_ring)) { + /* the rings are full so bump the head to create room */ + ice_fwlog_ring_increment(&hw->fwlog_ring.head, + hw->fwlog_ring.size); + } +} /** - * ice_aq_wait_for_event - Wait for an AdminQ event from firmware + * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware * @pf: pointer to the PF private structure + * @task: intermediate helper storage and identifier for waiting * @opcode: the opcode to wait for - * @timeout: how long to wait, in jiffies - * @event: storage for the event info * - * Waits for a specific AdminQ completion event on the ARQ for a given PF. The - * current thread will be put to sleep until the specified event occurs or - * until the given timeout is reached. + * Prepares to wait for a specific AdminQ completion event on the ARQ for + * a given PF. Actual wait would be done by a call to ice_aq_wait_for_event(). * - * To obtain only the descriptor contents, pass an event without an allocated - * msg_buf. If the complete data buffer is desired, allocate the - * event->msg_buf with enough space ahead of time. + * Calls are separated to allow caller registering for event before sending + * the command, which mitigates a race between registering and FW responding. * - * Returns: zero on success, or a negative error code on failure. + * To obtain only the descriptor contents, pass an task->event with null + * msg_buf. If the complete data buffer is desired, allocate the + * task->event.msg_buf with enough space ahead of time. */ -int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, - struct ice_rq_event_info *event) +void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task, + u16 opcode) { - struct device *dev = ice_pf_to_dev(pf); - struct ice_aq_task *task; - unsigned long start; - long ret; - int err; - - task = kzalloc(sizeof(*task), GFP_KERNEL); - if (!task) - return -ENOMEM; - INIT_HLIST_NODE(&task->entry); task->opcode = opcode; - task->event = event; task->state = ICE_AQ_TASK_WAITING; spin_lock_bh(&pf->aq_wait_lock); hlist_add_head(&task->entry, &pf->aq_wait_list); spin_unlock_bh(&pf->aq_wait_lock); +} - start = jiffies; +/** + * ice_aq_wait_for_event - Wait for an AdminQ event from firmware + * @pf: pointer to the PF private structure + * @task: ptr prepared by ice_aq_prep_for_event() + * @timeout: how long to wait, in jiffies + * + * Waits for a specific AdminQ completion event on the ARQ for a given PF. The + * current thread will be put to sleep until the specified event occurs or + * until the given timeout is reached. + * + * Returns: zero on success, or a negative error code on failure. + */ +int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task, + unsigned long timeout) +{ + enum ice_aq_task_state *state = &task->state; + struct device *dev = ice_pf_to_dev(pf); + unsigned long start = jiffies; + long ret; + int err; - ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state, + ret = wait_event_interruptible_timeout(pf->aq_wait_queue, + *state != ICE_AQ_TASK_WAITING, timeout); - switch (task->state) { + switch (*state) { + case ICE_AQ_TASK_NOT_PREPARED: + WARN(1, "call to %s without ice_aq_prep_for_event()", __func__); + err = -EINVAL; + break; case ICE_AQ_TASK_WAITING: err = ret < 0 ? ret : -ETIMEDOUT; break; @@ -1307,7 +1350,7 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, err = ret < 0 ? ret : 0; break; default: - WARN(1, "Unexpected AdminQ wait task state %u", task->state); + WARN(1, "Unexpected AdminQ wait task state %u", *state); err = -EINVAL; break; } @@ -1315,12 +1358,11 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n", jiffies_to_msecs(jiffies - start), jiffies_to_msecs(timeout), - opcode); + task->opcode); spin_lock_bh(&pf->aq_wait_lock); hlist_del(&task->entry); spin_unlock_bh(&pf->aq_wait_lock); - kfree(task); return err; } @@ -1346,23 +1388,26 @@ int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, static void ice_aq_check_events(struct ice_pf *pf, u16 opcode, struct ice_rq_event_info *event) { + struct ice_rq_event_info *task_ev; struct ice_aq_task *task; bool found = false; spin_lock_bh(&pf->aq_wait_lock); hlist_for_each_entry(task, &pf->aq_wait_list, entry) { - if (task->state || task->opcode != opcode) + if (task->state != ICE_AQ_TASK_WAITING) + continue; + if (task->opcode != opcode) continue; - memcpy(&task->event->desc, &event->desc, sizeof(event->desc)); - task->event->msg_len = event->msg_len; + task_ev = &task->event; + memcpy(&task_ev->desc, &event->desc, sizeof(event->desc)); + task_ev->msg_len = event->msg_len; /* Only copy the data buffer if a destination was set */ - if (task->event->msg_buf && - task->event->buf_len > event->buf_len) { - memcpy(task->event->msg_buf, event->msg_buf, + if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) { + memcpy(task_ev->msg_buf, event->msg_buf, event->buf_len); - task->event->buf_len = event->buf_len; + task_ev->buf_len = event->buf_len; } task->state = ICE_AQ_TASK_COMPLETE; @@ -1519,8 +1564,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ice_vc_process_vf_msg(pf, &event, &data); break; - case ice_aqc_opc_fw_logging: - ice_output_fw_log(hw, &event.desc, event.msg_buf); + case ice_aqc_opc_fw_logs_event: + ice_get_fwlog_data(pf, &event); break; case ice_aqc_opc_lldp_set_mib_change: ice_dcb_process_lldp_set_mib_change(pf, &event); @@ -1609,8 +1654,10 @@ static void ice_clean_sbq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - /* Nothing to do here if sideband queue is not supported */ - if (!ice_is_sbq_supported(hw)) { + /* if mac_type is not generic, sideband is not supported + * and there's nothing to do here + */ + if (!ice_is_generic_mac(hw)) { clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); return; } @@ -1703,6 +1750,39 @@ static void ice_service_timer(struct timer_list *t) } /** + * ice_mdd_maybe_reset_vf - reset VF after MDD event + * @pf: pointer to the PF structure + * @vf: pointer to the VF structure + * @reset_vf_tx: whether Tx MDD has occurred + * @reset_vf_rx: whether Rx MDD has occurred + * + * Since the queue can get stuck on VF MDD events, the PF can be configured to + * automatically reset the VF by enabling the private ethtool flag + * mdd-auto-reset-vf. + */ +static void ice_mdd_maybe_reset_vf(struct ice_pf *pf, struct ice_vf *vf, + bool reset_vf_tx, bool reset_vf_rx) +{ + struct device *dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) + return; + + /* VF MDD event counters will be cleared by reset, so print the event + * prior to reset. + */ + if (reset_vf_tx) + ice_print_vf_tx_mdd_event(vf); + + if (reset_vf_rx) + ice_print_vf_rx_mdd_event(vf); + + dev_info(dev, "PF-to-VF reset on PF %d VF %d due to MDD event\n", + pf->hw.pf_id, vf->vf_id); + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY | ICE_VF_RESET_LOCK); +} + +/** * ice_handle_mdd_event - handle malicious driver detect event * @pf: pointer to the PF structure * @@ -1731,14 +1811,10 @@ static void ice_handle_mdd_event(struct ice_pf *pf) /* find what triggered an MDD event */ reg = rd32(hw, GL_MDET_TX_PQM); if (reg & GL_MDET_TX_PQM_VALID_M) { - u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> - GL_MDET_TX_PQM_PF_NUM_S; - u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> - GL_MDET_TX_PQM_VF_NUM_S; - u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> - GL_MDET_TX_PQM_MAL_TYPE_S; - u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >> - GL_MDET_TX_PQM_QNUM_S); + u8 pf_num = FIELD_GET(GL_MDET_TX_PQM_PF_NUM_M, reg); + u16 vf_num = FIELD_GET(GL_MDET_TX_PQM_VF_NUM_M, reg); + u8 event = FIELD_GET(GL_MDET_TX_PQM_MAL_TYPE_M, reg); + u16 queue = FIELD_GET(GL_MDET_TX_PQM_QNUM_M, reg); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", @@ -1746,33 +1822,25 @@ static void ice_handle_mdd_event(struct ice_pf *pf) wr32(hw, GL_MDET_TX_PQM, 0xffffffff); } - reg = rd32(hw, GL_MDET_TX_TCLAN); + reg = rd32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw)); if (reg & GL_MDET_TX_TCLAN_VALID_M) { - u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> - GL_MDET_TX_TCLAN_PF_NUM_S; - u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> - GL_MDET_TX_TCLAN_VF_NUM_S; - u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> - GL_MDET_TX_TCLAN_MAL_TYPE_S; - u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >> - GL_MDET_TX_TCLAN_QNUM_S); + u8 pf_num = FIELD_GET(GL_MDET_TX_TCLAN_PF_NUM_M, reg); + u16 vf_num = FIELD_GET(GL_MDET_TX_TCLAN_VF_NUM_M, reg); + u8 event = FIELD_GET(GL_MDET_TX_TCLAN_MAL_TYPE_M, reg); + u16 queue = FIELD_GET(GL_MDET_TX_TCLAN_QNUM_M, reg); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); - wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); + wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX); } reg = rd32(hw, GL_MDET_RX); if (reg & GL_MDET_RX_VALID_M) { - u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >> - GL_MDET_RX_PF_NUM_S; - u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> - GL_MDET_RX_VF_NUM_S; - u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >> - GL_MDET_RX_MAL_TYPE_S; - u16 queue = ((reg & GL_MDET_RX_QNUM_M) >> - GL_MDET_RX_QNUM_S); + u8 pf_num = FIELD_GET(GL_MDET_RX_PF_NUM_M, reg); + u16 vf_num = FIELD_GET(GL_MDET_RX_VF_NUM_M, reg); + u8 event = FIELD_GET(GL_MDET_RX_MAL_TYPE_M, reg); + u16 queue = FIELD_GET(GL_MDET_RX_QNUM_M, reg); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", @@ -1788,9 +1856,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n"); } - reg = rd32(hw, PF_MDET_TX_TCLAN); + reg = rd32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw)); if (reg & PF_MDET_TX_TCLAN_VALID_M) { - wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF); + wr32(hw, PF_MDET_TX_TCLAN_BY_MAC(hw), 0xffff); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n"); } @@ -1807,6 +1875,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) */ mutex_lock(&pf->vfs.table_lock); ice_for_each_vf(pf, bkt, vf) { + bool reset_vf_tx = false, reset_vf_rx = false; + reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id)); if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF); @@ -1815,6 +1885,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", vf->vf_id); + + reset_vf_tx = true; } reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id)); @@ -1825,6 +1897,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", vf->vf_id); + + reset_vf_tx = true; } reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id)); @@ -1835,6 +1909,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", vf->vf_id); + + reset_vf_tx = true; } reg = rd32(hw, VP_MDET_RX(vf->vf_id)); @@ -1846,18 +1922,12 @@ static void ice_handle_mdd_event(struct ice_pf *pf) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", vf->vf_id); - /* Since the queue is disabled on VF Rx MDD events, the - * PF can be configured to reset the VF through ethtool - * private flag mdd-auto-reset-vf. - */ - if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) { - /* VF MDD event counters will be cleared by - * reset, so print the event prior to reset. - */ - ice_print_vf_rx_mdd_event(vf); - ice_reset_vf(vf, ICE_VF_RESET_LOCK); - } + reset_vf_rx = true; } + + if (reset_vf_tx || reset_vf_rx) + ice_mdd_maybe_reset_vf(pf, vf, reset_vf_tx, + reset_vf_rx); } mutex_unlock(&pf->vfs.table_lock); @@ -2133,7 +2203,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Ensure we have media as we cannot configure a medialess port */ if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) - return -EPERM; + return -ENOMEDIUM; ice_print_topo_conflict(vsi); @@ -2640,17 +2710,72 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) bpf_prog_put(old_prog); } +static struct ice_tx_ring *ice_xdp_ring_from_qid(struct ice_vsi *vsi, int qid) +{ + struct ice_q_vector *q_vector; + struct ice_tx_ring *ring; + + if (static_key_enabled(&ice_xdp_locking_key)) + return vsi->xdp_rings[qid % vsi->num_xdp_txq]; + + q_vector = vsi->rx_rings[qid]->q_vector; + ice_for_each_tx_ring(ring, q_vector->tx) + if (ice_ring_is_xdp(ring)) + return ring; + + return NULL; +} + +/** + * ice_map_xdp_rings - Map XDP rings to interrupt vectors + * @vsi: the VSI with XDP rings being configured + * + * Map XDP rings to interrupt vectors and perform the configuration steps + * dependent on the mapping. + */ +void ice_map_xdp_rings(struct ice_vsi *vsi) +{ + int xdp_rings_rem = vsi->num_xdp_txq; + int v_idx, q_idx; + + /* follow the logic from ice_vsi_map_rings_to_vectors */ + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + int xdp_rings_per_v, q_id, q_base; + + xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem, + vsi->num_q_vectors - v_idx); + q_base = vsi->num_xdp_txq - xdp_rings_rem; + + for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) { + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id]; + + xdp_ring->q_vector = q_vector; + xdp_ring->next = q_vector->tx.tx_ring; + q_vector->tx.tx_ring = xdp_ring; + } + xdp_rings_rem -= xdp_rings_per_v; + } + + ice_for_each_rxq(vsi, q_idx) { + vsi->rx_rings[q_idx]->xdp_ring = ice_xdp_ring_from_qid(vsi, + q_idx); + ice_tx_xsk_pool(vsi, q_idx); + } +} + /** * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP * @vsi: VSI to bring up Tx rings used by XDP * @prog: bpf program that will be assigned to VSI + * @cfg_type: create from scratch or restore the existing configuration * * Return 0 on success and negative value on error */ -int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) +int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, + enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; - int xdp_rings_rem = vsi->num_xdp_txq; struct ice_pf *pf = vsi->back; struct ice_qs_cfg xdp_qs_cfg = { .qs_mutex = &pf->avail_q_mutex, @@ -2663,8 +2788,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) .mapping_mode = ICE_VSI_MAP_CONTIG }; struct device *dev; - int i, v_idx; - int status; + int status, i; dev = ice_pf_to_dev(pf); vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq, @@ -2683,49 +2807,15 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) if (ice_xdp_alloc_setup_rings(vsi)) goto clear_xdp_rings; - /* follow the logic from ice_vsi_map_rings_to_vectors */ - ice_for_each_q_vector(vsi, v_idx) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - int xdp_rings_per_v, q_id, q_base; - - xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem, - vsi->num_q_vectors - v_idx); - q_base = vsi->num_xdp_txq - xdp_rings_rem; - - for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) { - struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id]; - - xdp_ring->q_vector = q_vector; - xdp_ring->next = q_vector->tx.tx_ring; - q_vector->tx.tx_ring = xdp_ring; - } - xdp_rings_rem -= xdp_rings_per_v; - } - - ice_for_each_rxq(vsi, i) { - if (static_key_enabled(&ice_xdp_locking_key)) { - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; - } else { - struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) { - if (ice_ring_is_xdp(ring)) { - vsi->rx_rings[i]->xdp_ring = ring; - break; - } - } - } - ice_tx_xsk_pool(vsi, i); - } - /* omit the scheduler update if in reset path; XDP queues will be * taken into account at the end of ice_vsi_rebuild, where * ice_cfg_vsi_lan is being called */ - if (ice_is_reset_in_progress(pf->state)) + if (cfg_type == ICE_XDP_CFG_PART) return 0; + ice_map_xdp_rings(vsi); + /* tell the Tx scheduler that right now we have * additional queues */ @@ -2775,22 +2865,21 @@ err_map_xdp: /** * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings * @vsi: VSI to remove XDP rings + * @cfg_type: disable XDP permanently or allow it to be restored later * * Detach XDP rings from irq vectors, clean up the PF bitmap and free * resources */ -int ice_destroy_xdp_rings(struct ice_vsi *vsi) +int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; int i, v_idx; /* q_vectors are freed in reset path so there's no point in detaching - * rings; in case of rebuild being triggered not from reset bits - * in pf->state won't be set, so additionally check first q_vector - * against NULL + * rings */ - if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + if (cfg_type == ICE_XDP_CFG_PART) goto free_qmap; ice_for_each_q_vector(vsi, v_idx) { @@ -2831,7 +2920,7 @@ free_qmap: if (static_key_enabled(&ice_xdp_locking_key)) static_branch_dec(&ice_xdp_locking_key); - if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + if (cfg_type == ICE_XDP_CFG_PART) return 0; ice_vsi_assign_bpf_prog(vsi, NULL); @@ -2942,7 +3031,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); } else { - xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); + xdp_ring_err = ice_prepare_xdp_rings(vsi, prog, + ICE_XDP_CFG_FULL); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); } @@ -2953,7 +3043,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_features_clear_redirect_target(vsi->netdev); - xdp_ring_err = ice_destroy_xdp_rings(vsi); + xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); /* reallocate Rx queues that were used for zero-copy */ @@ -3018,6 +3108,7 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) static void ice_ena_misc_vector(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; + u32 pf_intr_start_offset; u32 val; /* Disable anti-spoof detection interrupt to prevent spurious event @@ -3046,6 +3137,47 @@ static void ice_ena_misc_vector(struct ice_pf *pf) /* SW_ITR_IDX = 0, but don't change INTENA */ wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); + + if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + return; + pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST; + wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset), + GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); +} + +/** + * ice_ll_ts_intr - ll_ts interrupt handler + * @irq: interrupt number + * @data: pointer to a q_vector + */ +static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data) +{ + struct ice_pf *pf = data; + u32 pf_intr_start_offset; + struct ice_ptp_tx *tx; + unsigned long flags; + struct ice_hw *hw; + u32 val; + u8 idx; + + hw = &pf->hw; + tx = &pf->ptp.port.tx; + spin_lock_irqsave(&tx->lock, flags); + ice_ptp_complete_tx_single_tstamp(tx); + + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + spin_unlock_irqrestore(&tx->lock, flags); + + val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | + (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); + pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST; + wr32(hw, GLINT_DYN_CTL(pf->ll_ts_irq.index + pf_intr_start_offset), + val); + + return IRQ_HANDLED; } /** @@ -3056,6 +3188,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf) static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) { struct ice_pf *pf = (struct ice_pf *)data; + irqreturn_t ret = IRQ_HANDLED; struct ice_hw *hw = &pf->hw; struct device *dev; u32 oicr, ena_mask; @@ -3095,8 +3228,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) /* we have a reset warning */ ena_mask &= ~PFINT_OICR_GRST_M; - reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> - GLGEN_RSTAT_RESET_TYPE_S; + reset = FIELD_GET(GLGEN_RSTAT_RESET_TYPE_M, + rd32(hw, GLGEN_RSTAT)); if (reset == ICE_RESET_CORER) pf->corer_count++; @@ -3137,8 +3270,22 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (!hw->reset_ongoing) + if (ice_pf_state_is_nominal(pf) && + pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) { + struct ice_ptp_tx *tx = &pf->ptp.port.tx; + unsigned long flags; + u8 idx; + + spin_lock_irqsave(&tx->lock, flags); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + spin_unlock_irqrestore(&tx->lock, flags); + } else if (ice_ptp_pf_handles_tx_interrupt(pf)) { set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); + ret = IRQ_WAKE_THREAD; + } } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -3147,14 +3294,14 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) ena_mask &= ~PFINT_OICR_TSYN_EVNT_M; - if (hw->func_caps.ts_func_info.src_tmr_owned) { + if (ice_pf_src_tmr_owned(pf)) { /* Save EVENTs from GLTSYN register */ pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M | GLTSYN_STAT_EVENT1_M | GLTSYN_STAT_EVENT2_M); - set_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread); + ice_ptp_extts_event(pf); } } @@ -3177,8 +3324,11 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) set_bit(ICE_PFR_REQ, pf->state); } } + ice_service_task_schedule(pf); + if (ret == IRQ_HANDLED) + ice_irq_dynamic_ena(hw, NULL, NULL); - return IRQ_WAKE_THREAD; + return ret; } /** @@ -3194,12 +3344,7 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) hw = &pf->hw; if (ice_is_reset_in_progress(pf->state)) - return IRQ_HANDLED; - - ice_service_task_schedule(pf); - - if (test_and_clear_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread)) - ice_ptp_extts_event(pf); + goto skip_irq; if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { /* Process outstanding Tx timestamps. If there is more work, @@ -3211,6 +3356,7 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) } } +skip_irq: ice_irq_dynamic_ena(hw, NULL, NULL); return IRQ_HANDLED; @@ -3241,6 +3387,20 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw) } /** + * ice_free_irq_msix_ll_ts- Unroll ll_ts vector setup + * @pf: board private structure + */ +static void ice_free_irq_msix_ll_ts(struct ice_pf *pf) +{ + int irq_num = pf->ll_ts_irq.virq; + + synchronize_irq(irq_num); + devm_free_irq(ice_pf_to_dev(pf), irq_num, pf); + + ice_free_irq(pf, pf->ll_ts_irq); +} + +/** * ice_free_irq_msix_misc - Unroll misc vector setup * @pf: board private structure */ @@ -3259,6 +3419,8 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf); ice_free_irq(pf, pf->oicr_irq); + if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + ice_free_irq_msix_ll_ts(pf); } /** @@ -3284,10 +3446,12 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx) PFINT_MBX_CTL_CAUSE_ENA_M); wr32(hw, PFINT_MBX_CTL, val); - /* This enables Sideband queue Interrupt causes */ - val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) | - PFINT_SB_CTL_CAUSE_ENA_M); - wr32(hw, PFINT_SB_CTL, val); + if (!hw->dev_caps.ts_dev_info.ts_ll_int_read) { + /* enable Sideband queue Interrupt causes */ + val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) | + PFINT_SB_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_SB_CTL, val); + } ice_flush(hw); } @@ -3304,13 +3468,17 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - struct msi_map oicr_irq; + u32 pf_intr_start_offset; + struct msi_map irq; int err = 0; if (!pf->int_name[0]) snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", dev_driver_string(dev), dev_name(dev)); + if (!pf->int_name_ll_ts[0]) + snprintf(pf->int_name_ll_ts, sizeof(pf->int_name_ll_ts) - 1, + "%s-%s:ll_ts", dev_driver_string(dev), dev_name(dev)); /* Do not request IRQ but do enable OICR interrupt since settings are * lost during reset. Note that this function is called only during * rebuild path and not while reset is in progress. @@ -3319,11 +3487,11 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) goto skip_req_irq; /* reserve one vector in irq_tracker for misc interrupts */ - oicr_irq = ice_alloc_irq(pf, false); - if (oicr_irq.index < 0) - return oicr_irq.index; + irq = ice_alloc_irq(pf, false); + if (irq.index < 0) + return irq.index; - pf->oicr_irq = oicr_irq; + pf->oicr_irq = irq; err = devm_request_threaded_irq(dev, pf->oicr_irq.virq, ice_misc_intr, ice_misc_intr_thread_fn, 0, pf->int_name, pf); @@ -3334,10 +3502,34 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) return err; } + /* reserve one vector in irq_tracker for ll_ts interrupt */ + if (!pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + goto skip_req_irq; + + irq = ice_alloc_irq(pf, false); + if (irq.index < 0) + return irq.index; + + pf->ll_ts_irq = irq; + err = devm_request_irq(dev, pf->ll_ts_irq.virq, ice_ll_ts_intr, 0, + pf->int_name_ll_ts, pf); + if (err) { + dev_err(dev, "devm_request_irq for %s failed: %d\n", + pf->int_name_ll_ts, err); + ice_free_irq(pf, pf->ll_ts_irq); + return err; + } + skip_req_irq: ice_ena_misc_vector(pf); ice_ena_ctrlq_interrupts(hw, pf->oicr_irq.index); + /* This enables LL TS interrupt */ + pf_intr_start_offset = rd32(hw, PFINT_ALLOC) & PFINT_ALLOC_FIRST; + if (pf->hw.dev_caps.ts_dev_info.ts_ll_int_read) + wr32(hw, PFINT_SB_CTL, + ((pf->ll_ts_irq.index + pf_intr_start_offset) & + PFINT_SB_CTL_MSIX_INDX_M) | PFINT_SB_CTL_CAUSE_ENA_M); wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index), ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S); @@ -3362,9 +3554,11 @@ static void ice_napi_add(struct ice_vsi *vsi) if (!vsi->netdev) return; - ice_for_each_q_vector(vsi, v_idx) + ice_for_each_q_vector(vsi, v_idx) { netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi, ice_napi_poll); + __ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false); + } } /** @@ -3384,6 +3578,7 @@ static void ice_set_ops(struct ice_vsi *vsi) netdev->netdev_ops = &ice_netdev_ops; netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; + netdev->xdp_metadata_ops = &ice_xdp_md_ops; ice_set_ethtool_ops(netdev); if (vsi->type != ICE_VSI_PF) @@ -3392,6 +3587,7 @@ static void ice_set_ops(struct ice_vsi *vsi) netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_XSK_ZEROCOPY | NETDEV_XDP_ACT_RX_SG; + netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD; } /** @@ -3512,7 +3708,7 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_PF; - params.pi = pi; + params.port_info = pi; params.flags = ICE_VSI_FLAG_INIT; return ice_vsi_setup(pf, ¶ms); @@ -3525,7 +3721,7 @@ ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_CHNL; - params.pi = pi; + params.port_info = pi; params.ch = ch; params.flags = ICE_VSI_FLAG_INIT; @@ -3546,7 +3742,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_CTRL; - params.pi = pi; + params.port_info = pi; params.flags = ICE_VSI_FLAG_INIT; return ice_vsi_setup(pf, ¶ms); @@ -3566,7 +3762,7 @@ ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi_cfg_params params = {}; params.type = ICE_VSI_LB; - params.pi = pi; + params.port_info = pi; params.flags = ICE_VSI_FLAG_INIT; return ice_vsi_setup(pf, ¶ms); @@ -3794,6 +3990,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) static void ice_deinit_pf(struct ice_pf *pf) { ice_service_task_stop(pf); + mutex_destroy(&pf->lag_mutex); mutex_destroy(&pf->adev_mutex); mutex_destroy(&pf->sw_mutex); mutex_destroy(&pf->tc_mutex); @@ -3856,7 +4053,8 @@ static void ice_set_pf_caps(struct ice_pf *pf) } clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); - if (func_caps->common_cap.ieee_1588) + if (func_caps->common_cap.ieee_1588 && + !(pf->hw.mac_type == ICE_MAC_E830)) set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); pf->max_pf_txqs = func_caps->common_cap.num_txq; @@ -3874,6 +4072,7 @@ static int ice_init_pf(struct ice_pf *pf) mutex_init(&pf->sw_mutex); mutex_init(&pf->tc_mutex); mutex_init(&pf->adev_mutex); + mutex_init(&pf->lag_mutex); INIT_HLIST_HEAD(&pf->aq_wait_list); spin_lock_init(&pf->aq_wait_lock); @@ -4278,11 +4477,13 @@ static char *ice_get_opt_fw_name(struct ice_pf *pf) /** * ice_request_fw - Device initialization routine * @pf: pointer to the PF instance + * @firmware: double pointer to firmware struct + * + * Return: zero when successful, negative values otherwise. */ -static void ice_request_fw(struct ice_pf *pf) +static int ice_request_fw(struct ice_pf *pf, const struct firmware **firmware) { char *opt_fw_filename = ice_get_opt_fw_name(pf); - const struct firmware *firmware = NULL; struct device *dev = ice_pf_to_dev(pf); int err = 0; @@ -4291,29 +4492,95 @@ static void ice_request_fw(struct ice_pf *pf) * and warning messages for other errors. */ if (opt_fw_filename) { - err = firmware_request_nowarn(&firmware, opt_fw_filename, dev); - if (err) { - kfree(opt_fw_filename); - goto dflt_pkg_load; - } - - /* request for firmware was successful. Download to device */ - ice_load_pkg(firmware, pf); + err = firmware_request_nowarn(firmware, opt_fw_filename, dev); kfree(opt_fw_filename); - release_firmware(firmware); - return; + if (!err) + return err; + } + err = request_firmware(firmware, ICE_DDP_PKG_FILE, dev); + if (err) + dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n"); + + return err; +} + +/** + * ice_init_tx_topology - performs Tx topology initialization + * @hw: pointer to the hardware structure + * @firmware: pointer to firmware structure + * + * Return: zero when init was successful, negative values otherwise. + */ +static int +ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware) +{ + u8 num_tx_sched_layers = hw->num_tx_sched_layers; + struct ice_pf *pf = hw->back; + struct device *dev; + u8 *buf_copy; + int err; + + dev = ice_pf_to_dev(pf); + /* ice_cfg_tx_topo buf argument is not a constant, + * so we have to make a copy + */ + buf_copy = kmemdup(firmware->data, firmware->size, GFP_KERNEL); + + err = ice_cfg_tx_topo(hw, buf_copy, firmware->size); + if (!err) { + if (hw->num_tx_sched_layers > num_tx_sched_layers) + dev_info(dev, "Tx scheduling layers switching feature disabled\n"); + else + dev_info(dev, "Tx scheduling layers switching feature enabled\n"); + /* if there was a change in topology ice_cfg_tx_topo triggered + * a CORER and we need to re-init hw + */ + ice_deinit_hw(hw); + err = ice_init_hw(hw); + + return err; + } else if (err == -EIO) { + dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n"); + } + + return 0; +} + +/** + * ice_init_ddp_config - DDP related configuration + * @hw: pointer to the hardware structure + * @pf: pointer to pf structure + * + * This function loads DDP file from the disk, then initializes Tx + * topology. At the end DDP package is loaded on the card. + * + * Return: zero when init was successful, negative values otherwise. + */ +static int ice_init_ddp_config(struct ice_hw *hw, struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + const struct firmware *firmware = NULL; + int err; + + err = ice_request_fw(pf, &firmware); + if (err) { + dev_err(dev, "Fail during requesting FW: %d\n", err); + return err; } -dflt_pkg_load: - err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev); + err = ice_init_tx_topology(hw, firmware); if (err) { - dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n"); - return; + dev_err(dev, "Fail during initialization of Tx topology: %d\n", + err); + release_firmware(firmware); + return err; } - /* request for firmware was successful. Download to device */ + /* Download firmware to device */ ice_load_pkg(firmware, pf); release_firmware(firmware); + + return 0; } /** @@ -4344,6 +4611,19 @@ static void ice_print_wake_reason(struct ice_pf *pf) } /** + * ice_pf_fwlog_update_module - update 1 module + * @pf: pointer to the PF struct + * @log_level: log_level to use for the @module + * @module: module to update + */ +void ice_pf_fwlog_update_module(struct ice_pf *pf, int log_level, int module) +{ + struct ice_hw *hw = &pf->hw; + + hw->fwlog_cfg.module_entries[module].log_level = log_level; +} + +/** * ice_register_netdev - register netdev * @vsi: pointer to the VSI struct */ @@ -4422,91 +4702,32 @@ static void ice_decfg_netdev(struct ice_vsi *vsi) vsi->netdev = NULL; } -static int ice_start_eth(struct ice_vsi *vsi) -{ - int err; - - err = ice_init_mac_fltr(vsi->back); - if (err) - return err; - - err = ice_vsi_open(vsi); - if (err) - ice_fltr_remove_all(vsi); - - return err; -} - -static void ice_stop_eth(struct ice_vsi *vsi) -{ - ice_fltr_remove_all(vsi); - ice_vsi_close(vsi); -} - -static int ice_init_eth(struct ice_pf *pf) +/** + * ice_wait_for_fw - wait for full FW readiness + * @hw: pointer to the hardware structure + * @timeout: milliseconds that can elapse before timing out + */ +static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout) { - struct ice_vsi *vsi = ice_get_main_vsi(pf); - int err; - - if (!vsi) - return -EINVAL; - - /* init channel list */ - INIT_LIST_HEAD(&vsi->ch_list); + int fw_loading; + u32 elapsed = 0; - err = ice_cfg_netdev(vsi); - if (err) - return err; - /* Setup DCB netlink interface */ - ice_dcbnl_setup(vsi); + while (elapsed <= timeout) { + fw_loading = rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M; - err = ice_init_mac_fltr(pf); - if (err) - goto err_init_mac_fltr; - - err = ice_devlink_create_pf_port(pf); - if (err) - goto err_devlink_create_pf_port; - - SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); - - err = ice_register_netdev(vsi); - if (err) - goto err_register_netdev; - - err = ice_tc_indir_block_register(vsi); - if (err) - goto err_tc_indir_block_register; - - ice_napi_add(vsi); - - return 0; - -err_tc_indir_block_register: - ice_unregister_netdev(vsi); -err_register_netdev: - ice_devlink_destroy_pf_port(pf); -err_devlink_create_pf_port: -err_init_mac_fltr: - ice_decfg_netdev(vsi); - return err; -} - -static void ice_deinit_eth(struct ice_pf *pf) -{ - struct ice_vsi *vsi = ice_get_main_vsi(pf); - - if (!vsi) - return; + /* firmware was not yet loaded, we have to wait more */ + if (fw_loading) { + elapsed += 100; + msleep(100); + continue; + } + return 0; + } - ice_vsi_close(vsi); - ice_unregister_netdev(vsi); - ice_devlink_destroy_pf_port(pf); - ice_tc_indir_block_unregister(vsi); - ice_decfg_netdev(vsi); + return -ETIMEDOUT; } -static int ice_init_dev(struct ice_pf *pf) +int ice_init_dev(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; @@ -4518,11 +4739,25 @@ static int ice_init_dev(struct ice_pf *pf) return err; } + /* Some cards require longer initialization times + * due to necessity of loading FW from an external source. + * This can take even half a minute. + */ + if (ice_is_pf_c827(hw)) { + err = ice_wait_for_fw(hw, 30000); + if (err) { + dev_err(dev, "ice_wait_for_fw timed out"); + return err; + } + } + ice_init_feature_support(pf); - ice_request_fw(pf); + err = ice_init_ddp_config(hw, pf); + if (err) + return err; - /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be + /* if ice_init_ddp_config fails, ICE_FLAG_ADV_FEATURES bit won't be * set in pf->state, which will cause ice_is_safe_mode to return * true */ @@ -4587,7 +4822,7 @@ err_init_pf: return err; } -static void ice_deinit_dev(struct ice_pf *pf) +void ice_deinit_dev(struct ice_pf *pf) { ice_free_irq_msix_misc(pf); ice_deinit_pf(pf); @@ -4613,6 +4848,10 @@ static void ice_init_features(struct ice_pf *pf) if (ice_is_feature_supported(pf, ICE_F_GNSS)) ice_gnss_init(pf); + if (ice_is_feature_supported(pf, ICE_F_CGU) || + ice_is_feature_supported(pf, ICE_F_PHY_RCLK)) + ice_dpll_init(pf); + /* Note: Flow director init failure is non-fatal to load */ if (ice_init_fdir(pf)) dev_err(dev, "could not initialize flow director\n"); @@ -4627,10 +4866,15 @@ static void ice_init_features(struct ice_pf *pf) if (ice_init_lag(pf)) dev_warn(dev, "Failed to init link aggregation support\n"); + + ice_hwmon_init(pf); } static void ice_deinit_features(struct ice_pf *pf) { + if (ice_is_safe_mode(pf)) + return; + ice_deinit_lag(pf); if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) ice_cfg_lldp_mib_change(&pf->hw, false); @@ -4639,6 +4883,10 @@ static void ice_deinit_features(struct ice_pf *pf) ice_gnss_exit(pf); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_release(pf); + if (test_bit(ICE_FLAG_DPLL, pf->flags)) + ice_dpll_deinit(pf); + if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + xa_destroy(&pf->eswitch.reprs); } static void ice_init_wakeup(struct ice_pf *pf) @@ -4879,31 +5127,47 @@ static void ice_deinit(struct ice_pf *pf) /** * ice_load - load pf by init hw and starting VSI * @pf: pointer to the pf instance + * + * This function has to be called under devl_lock. */ int ice_load(struct ice_pf *pf) { - struct ice_vsi_cfg_params params = {}; struct ice_vsi *vsi; int err; - err = ice_init_dev(pf); + devl_assert_locked(priv_to_devlink(pf)); + + vsi = ice_get_main_vsi(pf); + + /* init channel list */ + INIT_LIST_HEAD(&vsi->ch_list); + + err = ice_cfg_netdev(vsi); if (err) return err; - vsi = ice_get_main_vsi(pf); + /* Setup DCB netlink interface */ + ice_dcbnl_setup(vsi); - params = ice_vsi_to_params(vsi); - params.flags = ICE_VSI_FLAG_INIT; + err = ice_init_mac_fltr(pf); + if (err) + goto err_init_mac_fltr; - rtnl_lock(); - err = ice_vsi_cfg(vsi, ¶ms); + err = ice_devlink_create_pf_port(pf); if (err) - goto err_vsi_cfg; + goto err_devlink_create_pf_port; + + SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); - err = ice_start_eth(ice_get_main_vsi(pf)); + err = ice_register_netdev(vsi); + if (err) + goto err_register_netdev; + + err = ice_tc_indir_block_register(vsi); if (err) - goto err_start_eth; - rtnl_unlock(); + goto err_tc_indir_block_register; + + ice_napi_add(vsi); err = ice_init_rdma(pf); if (err) @@ -4917,29 +5181,35 @@ int ice_load(struct ice_pf *pf) return 0; err_init_rdma: - ice_vsi_close(ice_get_main_vsi(pf)); - rtnl_lock(); -err_start_eth: - ice_vsi_decfg(ice_get_main_vsi(pf)); -err_vsi_cfg: - rtnl_unlock(); - ice_deinit_dev(pf); + ice_tc_indir_block_unregister(vsi); +err_tc_indir_block_register: + ice_unregister_netdev(vsi); +err_register_netdev: + ice_devlink_destroy_pf_port(pf); +err_devlink_create_pf_port: +err_init_mac_fltr: + ice_decfg_netdev(vsi); return err; } /** * ice_unload - unload pf by stopping VSI and deinit hw * @pf: pointer to the pf instance + * + * This function has to be called under devl_lock. */ void ice_unload(struct ice_pf *pf) { + struct ice_vsi *vsi = ice_get_main_vsi(pf); + + devl_assert_locked(priv_to_devlink(pf)); + ice_deinit_features(pf); ice_deinit_rdma(pf); - rtnl_lock(); - ice_stop_eth(ice_get_main_vsi(pf)); - ice_vsi_decfg(ice_get_main_vsi(pf)); - rtnl_unlock(); - ice_deinit_dev(pf); + ice_tc_indir_block_unregister(vsi); + ice_unregister_netdev(vsi); + ice_devlink_destroy_pf_port(pf); + ice_decfg_netdev(vsi); } /** @@ -4953,6 +5223,7 @@ static int ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { struct device *dev = &pdev->dev; + struct ice_adapter *adapter; struct ice_pf *pf; struct ice_hw *hw; int err; @@ -4962,6 +5233,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return -EINVAL; } + /* when under a kdump kernel initiate a reset before enabling the + * device in order to clear out any pending DMA transactions. These + * transactions can cause some systems to machine check when doing + * the pcim_enable_device() below. + */ + if (is_kdump_kernel()) { + pci_save_state(pdev); + pci_clear_master(pdev); + err = pcie_flr(pdev); + if (err) + return err; + pci_restore_state(pdev); + } + /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst */ @@ -4991,7 +5276,12 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pci_set_master(pdev); + adapter = ice_adapter_get(pdev); + if (IS_ERR(adapter)) + return PTR_ERR(adapter); + pf->pdev = pdev; + pf->adapter = adapter; pci_set_drvdata(pdev, pf); set_bit(ICE_DOWN, pf->state); /* Disable service task until DOWN bit is cleared */ @@ -5023,29 +5313,25 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (err) goto err_init; - err = ice_init_eth(pf); - if (err) - goto err_init_eth; - - err = ice_init_rdma(pf); + devl_lock(priv_to_devlink(pf)); + err = ice_load(pf); if (err) - goto err_init_rdma; + goto err_load; err = ice_init_devlink(pf); if (err) goto err_init_devlink; - - ice_init_features(pf); + devl_unlock(priv_to_devlink(pf)); return 0; err_init_devlink: - ice_deinit_rdma(pf); -err_init_rdma: - ice_deinit_eth(pf); -err_init_eth: + ice_unload(pf); +err_load: + devl_unlock(priv_to_devlink(pf)); ice_deinit(pf); err_init: + ice_adapter_put(pdev); pci_disable_device(pdev); return err; } @@ -5131,23 +5417,28 @@ static void ice_remove(struct pci_dev *pdev) ice_free_vfs(pf); } + ice_hwmon_exit(pf); + ice_service_task_stop(pf); ice_aq_cancel_waiting_tasks(pf); set_bit(ICE_DOWN, pf->state); if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); - ice_deinit_features(pf); + + devl_lock(priv_to_devlink(pf)); ice_deinit_devlink(pf); - ice_deinit_rdma(pf); - ice_deinit_eth(pf); - ice_deinit(pf); + ice_unload(pf); + devl_unlock(priv_to_devlink(pf)); + + ice_deinit(pf); ice_vsi_release_all(pf); ice_setup_mc_magic_wake(pf); ice_set_wake(pf); + ice_adapter_put(pdev); pci_disable_device(pdev); } @@ -5167,7 +5458,6 @@ static void ice_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PM /** * ice_prepare_for_shutdown - prep for PCI shutdown * @pf: board private structure @@ -5229,6 +5519,7 @@ static int ice_reinit_interrupt_scheme(struct ice_pf *pf) if (ret) goto err_reinit; ice_vsi_map_rings_to_vectors(pf->vsi[v]); + ice_vsi_set_napi_queues(pf->vsi[v]); } ret = ice_req_irq_msix_misc(pf); @@ -5255,7 +5546,7 @@ err_reinit: * Power Management callback to quiesce the device and prepare * for D3 transition. */ -static int __maybe_unused ice_suspend(struct device *dev) +static int ice_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct ice_pf *pf; @@ -5276,7 +5567,7 @@ static int __maybe_unused ice_suspend(struct device *dev) */ disabled = ice_service_task_stop(pf); - ice_unplug_aux_dev(pf); + ice_deinit_rdma(pf); /* Already suspended?, then there is nothing to do */ if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { @@ -5322,7 +5613,7 @@ static int __maybe_unused ice_suspend(struct device *dev) * ice_resume - PM callback for waking up from D3 * @dev: generic device information structure */ -static int __maybe_unused ice_resume(struct device *dev) +static int ice_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); enum ice_reset_req reset_type; @@ -5356,6 +5647,11 @@ static int __maybe_unused ice_resume(struct device *dev) if (ret) dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret); + ret = ice_init_rdma(pf); + if (ret) + dev_err(dev, "Reinitialize RDMA during resume failed: %d\n", + ret); + clear_bit(ICE_DOWN, pf->state); /* Now perform PF reset and rebuild */ reset_type = ICE_RESET_PFR; @@ -5373,7 +5669,6 @@ static int __maybe_unused ice_resume(struct device *dev) return 0; } -#endif /* CONFIG_PM */ /** * ice_pci_err_detected - warning that PCI error has been detected @@ -5465,7 +5760,7 @@ static void ice_pci_err_resume(struct pci_dev *pdev) return; } - ice_restore_all_vfs_msi_state(pdev); + ice_restore_all_vfs_msi_state(pf); ice_do_reset(pf, ICE_RESET_PFR); ice_service_task_restart(pf); @@ -5508,38 +5803,52 @@ static void ice_pci_err_reset_done(struct pci_dev *pdev) * Class, Class Mask, private data (not used) } */ static const struct pci_device_id ice_pci_tbl[] = { - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_QSFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E825C_SGMII), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_BACKPLANE) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_QSFP56) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_SFP) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830CC_SFP_DD) }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_BACKPLANE), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_QSFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_QSFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830C_SFP), }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E830_XXV_SFP), }, /* required last entry */ - { 0, } + {} }; MODULE_DEVICE_TABLE(pci, ice_pci_tbl); -static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume); static const struct pci_error_handlers ice_pci_err_handler = { .error_detected = ice_pci_err_detected, @@ -5554,11 +5863,11 @@ static struct pci_driver ice_driver = { .id_table = ice_pci_tbl, .probe = ice_probe, .remove = ice_remove, -#ifdef CONFIG_PM - .driver.pm = &ice_pm_ops, -#endif /* CONFIG_PM */ + .driver.pm = pm_sleep_ptr(&ice_pm_ops), .shutdown = ice_shutdown, .sriov_configure = ice_sriov_configure, + .sriov_get_vf_total_msix = ice_sriov_get_vf_total_msix, + .sriov_set_msix_vec_count = ice_sriov_set_msix_vec_count, .err_handler = &ice_pci_err_handler }; @@ -5570,23 +5879,40 @@ static struct pci_driver ice_driver = { */ static int __init ice_module_init(void) { - int status; + int status = -ENOMEM; pr_info("%s\n", ice_driver_string); pr_info("%s\n", ice_copyright); + ice_adv_lnk_speed_maps_init(); + ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME); if (!ice_wq) { pr_err("Failed to create workqueue\n"); - return -ENOMEM; + return status; } + ice_lag_wq = alloc_ordered_workqueue("ice_lag_wq", 0); + if (!ice_lag_wq) { + pr_err("Failed to create LAG workqueue\n"); + goto err_dest_wq; + } + + ice_debugfs_init(); + status = pci_register_driver(&ice_driver); if (status) { pr_err("failed to register PCI driver, err %d\n", status); - destroy_workqueue(ice_wq); + goto err_dest_lag_wq; } + return 0; + +err_dest_lag_wq: + destroy_workqueue(ice_lag_wq); + ice_debugfs_exit(); +err_dest_wq: + destroy_workqueue(ice_wq); return status; } module_init(ice_module_init); @@ -5600,7 +5926,9 @@ module_init(ice_module_init); static void __exit ice_module_exit(void) { pci_unregister_driver(&ice_driver); + ice_debugfs_exit(); destroy_workqueue(ice_wq); + destroy_workqueue(ice_lag_wq); pr_info("module unloaded\n"); } module_exit(ice_module_exit); @@ -5703,7 +6031,7 @@ static void ice_set_rx_mode(struct net_device *netdev) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - if (!vsi) + if (!vsi || ice_is_switchdev_running(vsi->back)) return; /* Set the flags to synchronize filters @@ -5943,6 +6271,23 @@ ice_fix_features(struct net_device *netdev, netdev_features_t features) } /** + * ice_set_rx_rings_vlan_proto - update rings with new stripped VLAN proto + * @vsi: PF's VSI + * @vlan_ethertype: VLAN ethertype (802.1Q or 802.1ad) in network byte order + * + * Store current stripped VLAN proto in ring packet context, + * so it can be accessed more efficiently by packet processing code. + */ +static void +ice_set_rx_rings_vlan_proto(struct ice_vsi *vsi, __be16 vlan_ethertype) +{ + u16 i; + + ice_for_each_alloc_rxq(vsi, i) + vsi->rx_rings[i]->pkt_ctx.vlan_proto = vlan_ethertype; +} + +/** * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI * @vsi: PF's VSI * @features: features used to determine VLAN offload settings @@ -5984,6 +6329,9 @@ ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features) if (strip_err || insert_err) return -EIO; + ice_set_rx_rings_vlan_proto(vsi, enable_stripping ? + htons(vlan_ethertype) : 0); + return 0; } @@ -6255,7 +6603,7 @@ static void ice_tx_dim_work(struct work_struct *work) u16 itr; dim = container_of(work, struct dim, work); - rc = (struct ice_ring_container *)dim->priv; + rc = dim->priv; WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile)); @@ -6275,7 +6623,7 @@ static void ice_rx_dim_work(struct work_struct *work) u16 itr; dim = container_of(work, struct dim, work); - rc = (struct ice_ring_container *)dim->priv; + rc = dim->priv; WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile)); @@ -6474,6 +6822,7 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { struct rtnl_link_stats64 *net_stats, *stats_prev; struct rtnl_link_stats64 *vsi_stats; + struct ice_pf *pf = vsi->back; u64 pkts, bytes; int i; @@ -6519,21 +6868,18 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) net_stats = &vsi->net_stats; stats_prev = &vsi->net_stats_prev; - /* clear prev counters after reset */ - if (vsi_stats->tx_packets < stats_prev->tx_packets || - vsi_stats->rx_packets < stats_prev->rx_packets) { - stats_prev->tx_packets = 0; - stats_prev->tx_bytes = 0; - stats_prev->rx_packets = 0; - stats_prev->rx_bytes = 0; + /* Update netdev counters, but keep in mind that values could start at + * random value after PF reset. And as we increase the reported stat by + * diff of Prev-Cur, we need to be sure that Prev is valid. If it's not, + * let's skip this round. + */ + if (likely(pf->stat_prev_loaded)) { + net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; + net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; + net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; + net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; } - /* update netdev counters */ - net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; - net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; - net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; - net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; - stats_prev->tx_packets = vsi_stats->tx_packets; stats_prev->tx_bytes = vsi_stats->tx_bytes; stats_prev->rx_packets = vsi_stats->rx_packets; @@ -6572,13 +6918,11 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) cur_ns->rx_crc_errors = pf->stats.crc_errors; cur_ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes + - pf->stats.rx_len_errors + pf->stats.rx_undersize + pf->hw_csum_rx_error + pf->stats.rx_jabber + pf->stats.rx_fragments + pf->stats.rx_oversize; - cur_ns->rx_length_errors = pf->stats.rx_len_errors; /* record drops from the port level */ cur_ns->rx_missed_errors = pf->stats.eth.rx_discards; } @@ -6718,9 +7062,6 @@ void ice_update_pf_stats(struct ice_pf *pf) &prev_ps->mac_remote_faults, &cur_ps->mac_remote_faults); - ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded, - &prev_ps->rx_len_errors, &cur_ps->rx_len_errors); - ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded, &prev_ps->rx_undersize, &cur_ps->rx_undersize); @@ -6803,6 +7144,50 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) } /** + * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI + * @vsi: the VSI being un-configured + */ +static void ice_vsi_dis_irq(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + int i; + + /* disable interrupt causation from each Rx queue; Tx queues are + * handled in ice_vsi_stop_tx_ring() + */ + if (vsi->rx_rings) { + ice_for_each_rxq(vsi, i) { + if (vsi->rx_rings[i]) { + u16 reg; + + reg = vsi->rx_rings[i]->reg_idx; + val = rd32(hw, QINT_RQCTL(reg)); + val &= ~QINT_RQCTL_CAUSE_ENA_M; + wr32(hw, QINT_RQCTL(reg), val); + } + } + } + + /* disable each interrupt */ + ice_for_each_q_vector(vsi, i) { + if (!vsi->q_vectors[i]) + continue; + wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0); + } + + ice_flush(hw); + + /* don't call synchronize_irq() for VF's from the host */ + if (vsi->type == ICE_VSI_VF) + return; + + ice_for_each_q_vector(vsi, i) + synchronize_irq(vsi->q_vectors[i]->irq.virq); +} + +/** * ice_down - Shutdown the connection * @vsi: The VSI being stopped * @@ -6814,13 +7199,11 @@ int ice_down(struct ice_vsi *vsi) WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state)); - if (vsi->netdev && vsi->type == ICE_VSI_PF) { + if (vsi->netdev) { vlan_err = ice_vsi_del_vlan_zero(vsi); ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); - } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { - ice_eswitch_stop_all_tx_queues(vsi->back); } ice_vsi_dis_irq(vsi); @@ -7291,7 +7674,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) * fail. */ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - ice_ptp_reset(pf); + ice_ptp_rebuild(pf, reset_type); if (ice_is_feature_supported(pf, ICE_F_GNSS)) ice_gnss_init(pf); @@ -7303,15 +7686,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } - /* configure PTP timestamping after VSI rebuild */ - if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - ice_ptp_cfg_timestamp(pf, false); - - err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL); - if (err) { - dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err); - goto err_vsi_rebuild; - } + ice_eswitch_rebuild(pf); if (reset_type == ICE_RESET_PFR) { err = ice_rebuild_channels(pf); @@ -7356,6 +7731,11 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) clear_bit(ICE_RESET_FAILED, pf->state); ice_plug_aux_dev(pf); + if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) + ice_lag_rebuild(pf); + + /* Restore timestamp mode settings after VSI rebuild */ + ice_ptp_restore_timestamp_mode(pf); return; err_vsi_rebuild: @@ -7424,7 +7804,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return -EBUSY; } - netdev->mtu = (unsigned int)new_mtu; + WRITE_ONCE(netdev->mtu, (unsigned int)new_mtu); err = ice_down_up(vsi); if (err) return err; @@ -7605,6 +7985,59 @@ int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) } /** + * ice_set_rss_hfunc - Set RSS HASH function + * @vsi: Pointer to VSI structure + * @hfunc: hash function (ICE_AQ_VSI_Q_OPT_RSS_*) + * + * Returns 0 on success, negative on failure + */ +int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc) +{ + struct ice_hw *hw = &vsi->back->hw; + struct ice_vsi_ctx *ctx; + bool symm; + int err; + + if (hfunc == vsi->rss_hfunc) + return 0; + + if (hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ && + hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ) + return -EOPNOTSUPP; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + ctx->info.q_opt_rss = vsi->info.q_opt_rss; + ctx->info.q_opt_rss &= ~ICE_AQ_VSI_Q_OPT_RSS_HASH_M; + ctx->info.q_opt_rss |= + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hfunc); + ctx->info.q_opt_tc = vsi->info.q_opt_tc; + ctx->info.q_opt_flags = vsi->info.q_opt_rss; + + err = ice_update_vsi(hw, vsi->idx, ctx, NULL); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to configure RSS hash for VSI %d, error %d\n", + vsi->vsi_num, err); + } else { + vsi->info.q_opt_rss = ctx->info.q_opt_rss; + vsi->rss_hfunc = hfunc; + netdev_info(vsi->netdev, "Hash function set to: %sToeplitz\n", + hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ ? + "Symmetric " : ""); + } + kfree(ctx); + if (err) + return err; + + /* Fix the symmetry setting for all existing RSS configurations */ + symm = !!(hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ); + return ice_set_rss_cfg_symm(hw, vsi, symm); +} + +/** * ice_bridge_getlink - Get the hardware bridge mode * @skb: skb buff * @pid: process ID @@ -7701,13 +8134,12 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; - nla_for_each_nested(attr, br_spec, rem) { - __u16 mode; + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { + __u16 mode = nla_get_u16(attr); - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - mode = nla_get_u16(attr); if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB) return -EINVAL; /* Continue if bridge mode is not being flipped */ @@ -7790,8 +8222,8 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct ice_hw *hw = &pf->hw; u32 head, val = 0; - head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) & - QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S; + head = FIELD_GET(QTX_COMM_HEAD_HEAD_M, + rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue]))); /* Read interrupt register */ val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); @@ -8039,13 +8471,12 @@ static int ice_add_vsi_to_fdir(struct ice_pf *pf, struct ice_vsi *vsi) for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) { enum ice_flow_priority prio; - u64 prof_id; /* add this VSI to FDir profile for this flow */ prio = ICE_FLOW_PRIO_NORMAL; prof = hw->fdir_prof[flow]; - prof_id = flow + tun * ICE_FLTR_PTYPE_MAX; - status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, + status = ice_flow_add_entry(hw, ICE_BLK_FD, + prof->prof_id[tun], prof->vsi_h[0], vsi->idx, prio, prof->fdir_seg[tun], &entry_h); @@ -9088,8 +9519,14 @@ int ice_stop(struct net_device *netdev) int link_err = ice_force_phys_link_state(vsi, false); if (link_err) { - netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", - vsi->vsi_num, link_err); + if (link_err == -ENOMEDIUM) + netdev_info(vsi->netdev, "Skipping link reconfig - no media attached, VSI %d\n", + vsi->vsi_num); + else + netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", + vsi->vsi_num, link_err); + + ice_vsi_close(vsi); return -EIO; } } |