diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_main.c')
| -rw-r--r-- | drivers/net/ethernet/intel/ice/ice_main.c | 1212 | 
1 files changed, 747 insertions, 465 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ca2898467dcb..567694bf098b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -22,6 +22,7 @@  #include "ice_eswitch.h"  #include "ice_tc_lib.h"  #include "ice_vsi_vlan_ops.h" +#include <net/xdp_sock_drv.h>  #define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"  static const char ice_driver_string[] = DRV_SUMMARY; @@ -44,7 +45,6 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX  MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");  #endif /* !CONFIG_DYNAMIC_DEBUG */ -static DEFINE_IDA(ice_aux_ida);  DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);  EXPORT_SYMBOL(ice_xdp_locking_key); @@ -130,12 +130,17 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)  	ice_for_each_txq(vsi, i) {  		struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; +		struct ice_ring_stats *ring_stats;  		if (!tx_ring)  			continue;  		if (ice_ring_ch_enabled(tx_ring))  			continue; +		ring_stats = tx_ring->ring_stats; +		if (!ring_stats) +			continue; +  		if (tx_ring->desc) {  			/* If packet counter has not changed the queue is  			 * likely stalled, so force an interrupt for this @@ -144,8 +149,8 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)  			 * prev_pkt would be negative if there was no  			 * pending work.  			 */ -			packets = tx_ring->stats.pkts & INT_MAX; -			if (tx_ring->tx_stats.prev_pkt == packets) { +			packets = ring_stats->stats.pkts & INT_MAX; +			if (ring_stats->tx_stats.prev_pkt == packets) {  				/* Trigger sw interrupt to revive the queue */  				ice_trigger_sw_intr(hw, tx_ring->q_vector);  				continue; @@ -155,7 +160,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)  			 * to ice_get_tx_pending()  			 */  			smp_rmb(); -			tx_ring->tx_stats.prev_pkt = +			ring_stats->tx_stats.prev_pkt =  			    ice_get_tx_pending(tx_ring) ? packets : -1;  		}  	} @@ -270,6 +275,8 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)  	if (status && status != -EEXIST)  		return status; +	netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n", +		   vsi->vsi_num, promisc_m);  	return 0;  } @@ -295,24 +302,12 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)  						    promisc_m, 0);  	} +	netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n", +		   vsi->vsi_num, promisc_m);  	return status;  }  /** - * ice_get_devlink_port - Get devlink port from netdev - * @netdev: the netdevice structure - */ -static struct devlink_port *ice_get_devlink_port(struct net_device *netdev) -{ -	struct ice_pf *pf = ice_netdev_to_pf(netdev); - -	if (!ice_is_switchdev_running(pf)) -		return NULL; - -	return &pf->devlink_port; -} - -/**   * ice_vsi_sync_fltr - Update the VSI filter list to the HW   * @vsi: ptr to the VSI   * @@ -423,6 +418,16 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)  				}  				err = 0;  				vlan_ops->dis_rx_filtering(vsi); + +				/* promiscuous mode implies allmulticast so +				 * that VSIs that are in promiscuous mode are +				 * subscribed to multicast packets coming to +				 * the port +				 */ +				err = ice_set_promisc(vsi, +						      ICE_MCAST_PROMISC_BITS); +				if (err) +					goto out_promisc;  			}  		} else {  			/* Clear Rx filter to remove traffic from wire */ @@ -439,6 +444,18 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)  				    NETIF_F_HW_VLAN_CTAG_FILTER)  					vlan_ops->ena_rx_filtering(vsi);  			} + +			/* disable allmulti here, but only if allmulti is not +			 * still enabled for the netdev +			 */ +			if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) { +				err = ice_clear_promisc(vsi, +							ICE_MCAST_PROMISC_BITS); +				if (err) { +					netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n", +						   err, vsi->vsi_num); +				} +			}  		}  	}  	goto exit; @@ -547,7 +564,7 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)  	/* Disable VFs until reset is completed */  	mutex_lock(&pf->vfs.table_lock);  	ice_for_each_vf(pf, bkt, vf) -		ice_set_vf_state_qs_dis(vf); +		ice_set_vf_state_dis(vf);  	mutex_unlock(&pf->vfs.table_lock);  	if (ice_is_eswitch_mode_switchdev(pf)) { @@ -1120,8 +1137,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,  	if (link_up == old_link && link_speed == old_link_speed)  		return 0; -	if (!ice_is_e810(&pf->hw)) -		ice_ptp_link_change(pf, pf->hw.pf_id, link_up); +	ice_ptp_link_change(pf, pf->hw.pf_id, link_up);  	if (ice_is_dcb_active(pf)) {  		if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -2560,21 +2576,26 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)  	ice_for_each_xdp_txq(vsi, i) {  		u16 xdp_q_idx = vsi->alloc_txq + i; +		struct ice_ring_stats *ring_stats;  		struct ice_tx_ring *xdp_ring;  		xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); -  		if (!xdp_ring)  			goto free_xdp_rings; +		ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); +		if (!ring_stats) { +			ice_free_tx_ring(xdp_ring); +			goto free_xdp_rings; +		} + +		xdp_ring->ring_stats = ring_stats;  		xdp_ring->q_index = xdp_q_idx;  		xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];  		xdp_ring->vsi = vsi;  		xdp_ring->netdev = NULL;  		xdp_ring->dev = dev;  		xdp_ring->count = vsi->num_tx_desc; -		xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1; -		xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;  		WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);  		if (ice_setup_tx_ring(xdp_ring))  			goto free_xdp_rings; @@ -2589,9 +2610,13 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)  	return 0;  free_xdp_rings: -	for (; i >= 0; i--) -		if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) +	for (; i >= 0; i--) { +		if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) { +			kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); +			vsi->xdp_rings[i]->ring_stats = NULL;  			ice_free_tx_ring(vsi->xdp_rings[i]); +		} +	}  	return -ENOMEM;  } @@ -2792,6 +2817,8 @@ free_qmap:  				synchronize_rcu();  				ice_free_tx_ring(vsi->xdp_rings[i]);  			} +			kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); +			vsi->xdp_rings[i]->ring_stats = NULL;  			kfree_rcu(vsi->xdp_rings[i], rcu);  			vsi->xdp_rings[i] = NULL;  		} @@ -2860,6 +2887,18 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)  }  /** + * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * @vsi: Pointer to VSI structure + */ +static int ice_max_xdp_frame_size(struct ice_vsi *vsi) +{ +	if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) +		return ICE_RXBUF_1664; +	else +		return ICE_RXBUF_3072; +} + +/**   * ice_xdp_setup_prog - Add or remove XDP eBPF program   * @vsi: VSI to setup XDP for   * @prog: XDP program @@ -2869,13 +2908,16 @@ static int  ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,  		   struct netlink_ext_ack *extack)  { -	int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; +	unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;  	bool if_running = netif_running(vsi->netdev);  	int ret = 0, xdp_ring_err = 0; -	if (frame_size > vsi->rx_buf_len) { -		NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP"); -		return -EOPNOTSUPP; +	if (prog && !prog->aux->xdp_has_frags) { +		if (frame_size > ice_max_xdp_frame_size(vsi)) { +			NL_SET_ERR_MSG_MOD(extack, +					   "MTU is too large for linear frames and XDP prog does not support frags"); +			return -EOPNOTSUPP; +		}  	}  	/* need to stop netdev while setting up the program for Rx rings */ @@ -2896,11 +2938,13 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,  			if (xdp_ring_err)  				NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");  		} +		xdp_features_set_redirect_target(vsi->netdev, true);  		/* reallocate Rx queues that are used for zero-copy */  		xdp_ring_err = ice_realloc_zc_buf(vsi, true);  		if (xdp_ring_err)  			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");  	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) { +		xdp_features_clear_redirect_target(vsi->netdev);  		xdp_ring_err = ice_destroy_xdp_rings(vsi);  		if (xdp_ring_err)  			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); @@ -3315,10 +3359,11 @@ static void ice_napi_add(struct ice_vsi *vsi)  /**   * ice_set_ops - set netdev and ethtools ops for the given netdev - * @netdev: netdev instance + * @vsi: the VSI associated with the new netdev   */ -static void ice_set_ops(struct net_device *netdev) +static void ice_set_ops(struct ice_vsi *vsi)  { +	struct net_device *netdev = vsi->netdev;  	struct ice_pf *pf = ice_netdev_to_pf(netdev);  	if (ice_is_safe_mode(pf)) { @@ -3330,6 +3375,13 @@ static void ice_set_ops(struct net_device *netdev)  	netdev->netdev_ops = &ice_netdev_ops;  	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;  	ice_set_ethtool_ops(netdev); + +	if (vsi->type != ICE_VSI_PF) +		return; + +	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | +			       NETDEV_XDP_ACT_XSK_ZEROCOPY | +			       NETDEV_XDP_ACT_RX_SG;  }  /** @@ -3418,53 +3470,8 @@ static void ice_set_netdev_features(struct net_device *netdev)  	 * be changed at runtime  	 */  	netdev->hw_features |= NETIF_F_RXFCS; -} -/** - * ice_cfg_netdev - Allocate, configure and register a netdev - * @vsi: the VSI associated with the new netdev - * - * Returns 0 on success, negative value on failure - */ -static int ice_cfg_netdev(struct ice_vsi *vsi) -{ -	struct ice_netdev_priv *np; -	struct net_device *netdev; -	u8 mac_addr[ETH_ALEN]; - -	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, -				    vsi->alloc_rxq); -	if (!netdev) -		return -ENOMEM; - -	set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); -	vsi->netdev = netdev; -	np = netdev_priv(netdev); -	np->vsi = vsi; - -	ice_set_netdev_features(netdev); - -	ice_set_ops(netdev); - -	if (vsi->type == ICE_VSI_PF) { -		SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back)); -		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); -		eth_hw_addr_set(netdev, mac_addr); -		ether_addr_copy(netdev->perm_addr, mac_addr); -	} - -	netdev->priv_flags |= IFF_UNICAST_FLT; - -	/* Setup netdev TC information */ -	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); - -	/* setup watchdog timeout value to be 5 second */ -	netdev->watchdog_timeo = 5 * HZ; - -	netdev->min_mtu = ETH_MIN_MTU; -	netdev->max_mtu = ICE_MAX_MTU; - -	return 0; +	netif_set_tso_max_size(netdev, ICE_MAX_TSO_SIZE);  }  /** @@ -3492,14 +3499,27 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)  static struct ice_vsi *  ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)  { -	return ice_vsi_setup(pf, pi, ICE_VSI_PF, NULL, NULL); +	struct ice_vsi_cfg_params params = {}; + +	params.type = ICE_VSI_PF; +	params.pi = pi; +	params.flags = ICE_VSI_FLAG_INIT; + +	return ice_vsi_setup(pf, ¶ms);  }  static struct ice_vsi *  ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,  		   struct ice_channel *ch)  { -	return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, NULL, ch); +	struct ice_vsi_cfg_params params = {}; + +	params.type = ICE_VSI_CHNL; +	params.pi = pi; +	params.ch = ch; +	params.flags = ICE_VSI_FLAG_INIT; + +	return ice_vsi_setup(pf, ¶ms);  }  /** @@ -3513,7 +3533,13 @@ ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,  static struct ice_vsi *  ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)  { -	return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, NULL, NULL); +	struct ice_vsi_cfg_params params = {}; + +	params.type = ICE_VSI_CTRL; +	params.pi = pi; +	params.flags = ICE_VSI_FLAG_INIT; + +	return ice_vsi_setup(pf, ¶ms);  }  /** @@ -3527,7 +3553,13 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)  struct ice_vsi *  ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)  { -	return ice_vsi_setup(pf, pi, ICE_VSI_LB, NULL, NULL); +	struct ice_vsi_cfg_params params = {}; + +	params.type = ICE_VSI_LB; +	params.pi = pi; +	params.flags = ICE_VSI_FLAG_INIT; + +	return ice_vsi_setup(pf, ¶ms);  }  /** @@ -3687,20 +3719,6 @@ static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)  }  /** - * ice_tc_indir_block_remove - clean indirect TC block notifications - * @pf: PF structure - */ -static void ice_tc_indir_block_remove(struct ice_pf *pf) -{ -	struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); - -	if (!pf_vsi) -		return; - -	ice_tc_indir_block_unregister(pf_vsi); -} - -/**   * ice_tc_indir_block_register - Register TC indirect block notifications   * @vsi: VSI struct which has the netdev   * @@ -3720,78 +3738,6 @@ static int ice_tc_indir_block_register(struct ice_vsi *vsi)  }  /** - * ice_setup_pf_sw - Setup the HW switch on startup or after reset - * @pf: board private structure - * - * Returns 0 on success, negative value on failure - */ -static int ice_setup_pf_sw(struct ice_pf *pf) -{ -	struct device *dev = ice_pf_to_dev(pf); -	bool dvm = ice_is_dvm_ena(&pf->hw); -	struct ice_vsi *vsi; -	int status; - -	if (ice_is_reset_in_progress(pf->state)) -		return -EBUSY; - -	status = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL); -	if (status) -		return -EIO; - -	vsi = ice_pf_vsi_setup(pf, pf->hw.port_info); -	if (!vsi) -		return -ENOMEM; - -	/* init channel list */ -	INIT_LIST_HEAD(&vsi->ch_list); - -	status = ice_cfg_netdev(vsi); -	if (status) -		goto unroll_vsi_setup; -	/* netdev has to be configured before setting frame size */ -	ice_vsi_cfg_frame_size(vsi); - -	/* init indirect block notifications */ -	status = ice_tc_indir_block_register(vsi); -	if (status) { -		dev_err(dev, "Failed to register netdev notifier\n"); -		goto unroll_cfg_netdev; -	} - -	/* Setup DCB netlink interface */ -	ice_dcbnl_setup(vsi); - -	/* registering the NAPI handler requires both the queues and -	 * netdev to be created, which are done in ice_pf_vsi_setup() -	 * and ice_cfg_netdev() respectively -	 */ -	ice_napi_add(vsi); - -	status = ice_init_mac_fltr(pf); -	if (status) -		goto unroll_napi_add; - -	return 0; - -unroll_napi_add: -	ice_tc_indir_block_unregister(vsi); -unroll_cfg_netdev: -	if (vsi) { -		ice_napi_del(vsi); -		if (vsi->netdev) { -			clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); -			free_netdev(vsi->netdev); -			vsi->netdev = NULL; -		} -	} - -unroll_vsi_setup: -	ice_vsi_release(vsi); -	return status; -} - -/**   * ice_get_avail_q_count - Get count of queues in use   * @pf_qmap: bitmap to get queue use count from   * @lock: pointer to a mutex that protects access to pf_qmap @@ -4192,12 +4138,13 @@ bool ice_is_wol_supported(struct ice_hw *hw)   * @vsi: VSI being changed   * @new_rx: new number of Rx queues   * @new_tx: new number of Tx queues + * @locked: is adev device_lock held   *   * Only change the number of queues if new_tx, or new_rx is non-0.   *   * Returns 0 on success.   */ -int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) +int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)  {  	struct ice_pf *pf = vsi->back;  	int err = 0, timeout = 50; @@ -4219,14 +4166,14 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)  	/* set for the next time the netdev is started */  	if (!netif_running(vsi->netdev)) { -		ice_vsi_rebuild(vsi, false); +		ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);  		dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");  		goto done;  	}  	ice_vsi_close(vsi); -	ice_vsi_rebuild(vsi, false); -	ice_pf_dcb_recfg(pf); +	ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); +	ice_pf_dcb_recfg(pf, locked);  	ice_vsi_open(vsi);  done:  	clear_bit(ICE_CFG_BUSY, pf->state); @@ -4488,6 +4435,23 @@ err_vsi_open:  	return err;  } +static void ice_deinit_fdir(struct ice_pf *pf) +{ +	struct ice_vsi *vsi = ice_get_ctrl_vsi(pf); + +	if (!vsi) +		return; + +	ice_vsi_manage_fdir(vsi, false); +	ice_vsi_release(vsi); +	if (pf->ctrl_vsi_idx != ICE_NO_VSI) { +		pf->vsi[pf->ctrl_vsi_idx] = NULL; +		pf->ctrl_vsi_idx = ICE_NO_VSI; +	} + +	mutex_destroy(&(&pf->hw)->fdir_fltr_lock); +} +  /**   * ice_get_opt_fw_name - return optional firmware file name or NULL   * @pf: pointer to the PF instance @@ -4587,124 +4551,172 @@ static void ice_print_wake_reason(struct ice_pf *pf)  }  /** - * ice_register_netdev - register netdev and devlink port - * @pf: pointer to the PF struct + * ice_register_netdev - register netdev + * @vsi: pointer to the VSI struct   */ -static int ice_register_netdev(struct ice_pf *pf) +static int ice_register_netdev(struct ice_vsi *vsi)  { -	struct ice_vsi *vsi; -	int err = 0; +	int err; -	vsi = ice_get_main_vsi(pf);  	if (!vsi || !vsi->netdev)  		return -EIO; -	err = ice_devlink_create_pf_port(pf); -	if (err) -		goto err_devlink_create; -  	err = register_netdev(vsi->netdev);  	if (err) -		goto err_register_netdev; +		return err;  	set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);  	netif_carrier_off(vsi->netdev);  	netif_tx_stop_all_queues(vsi->netdev); -	devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); -  	return 0; -err_register_netdev: -	ice_devlink_destroy_pf_port(pf); -err_devlink_create: -	free_netdev(vsi->netdev); -	vsi->netdev = NULL; -	clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); -	return err; +} + +static void ice_unregister_netdev(struct ice_vsi *vsi) +{ +	if (!vsi || !vsi->netdev) +		return; + +	unregister_netdev(vsi->netdev); +	clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);  }  /** - * ice_probe - Device initialization routine - * @pdev: PCI device information struct - * @ent: entry in ice_pci_tbl + * ice_cfg_netdev - Allocate, configure and register a netdev + * @vsi: the VSI associated with the new netdev   * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative value on failure   */ -static int -ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) +static int ice_cfg_netdev(struct ice_vsi *vsi)  { -	struct device *dev = &pdev->dev; -	struct ice_pf *pf; -	struct ice_hw *hw; -	int i, err; +	struct ice_netdev_priv *np; +	struct net_device *netdev; +	u8 mac_addr[ETH_ALEN]; -	if (pdev->is_virtfn) { -		dev_err(dev, "can't probe a virtual function\n"); -		return -EINVAL; +	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, +				    vsi->alloc_rxq); +	if (!netdev) +		return -ENOMEM; + +	set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); +	vsi->netdev = netdev; +	np = netdev_priv(netdev); +	np->vsi = vsi; + +	ice_set_netdev_features(netdev); +	ice_set_ops(vsi); + +	if (vsi->type == ICE_VSI_PF) { +		SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back)); +		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); +		eth_hw_addr_set(netdev, mac_addr);  	} -	/* this driver uses devres, see -	 * Documentation/driver-api/driver-model/devres.rst -	 */ -	err = pcim_enable_device(pdev); +	netdev->priv_flags |= IFF_UNICAST_FLT; + +	/* Setup netdev TC information */ +	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); + +	netdev->max_mtu = ICE_MAX_MTU; + +	return 0; +} + +static void ice_decfg_netdev(struct ice_vsi *vsi) +{ +	clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); +	free_netdev(vsi->netdev); +	vsi->netdev = NULL; +} + +static int ice_start_eth(struct ice_vsi *vsi) +{ +	int err; + +	err = ice_init_mac_fltr(vsi->back);  	if (err)  		return err; -	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev)); -	if (err) { -		dev_err(dev, "BAR0 I/O map error %d\n", err); -		return err; -	} +	rtnl_lock(); +	err = ice_vsi_open(vsi); +	rtnl_unlock(); -	pf = ice_allocate_pf(dev); -	if (!pf) -		return -ENOMEM; +	return err; +} -	/* initialize Auxiliary index to invalid value */ -	pf->aux_idx = -1; +static int ice_init_eth(struct ice_pf *pf) +{ +	struct ice_vsi *vsi = ice_get_main_vsi(pf); +	int err; -	/* set up for high or low DMA */ -	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); -	if (err) { -		dev_err(dev, "DMA configuration failed: 0x%x\n", err); +	if (!vsi) +		return -EINVAL; + +	/* init channel list */ +	INIT_LIST_HEAD(&vsi->ch_list); + +	err = ice_cfg_netdev(vsi); +	if (err)  		return err; -	} +	/* Setup DCB netlink interface */ +	ice_dcbnl_setup(vsi); -	pci_enable_pcie_error_reporting(pdev); -	pci_set_master(pdev); +	err = ice_init_mac_fltr(pf); +	if (err) +		goto err_init_mac_fltr; -	pf->pdev = pdev; -	pci_set_drvdata(pdev, pf); -	set_bit(ICE_DOWN, pf->state); -	/* Disable service task until DOWN bit is cleared */ -	set_bit(ICE_SERVICE_DIS, pf->state); +	err = ice_devlink_create_pf_port(pf); +	if (err) +		goto err_devlink_create_pf_port; -	hw = &pf->hw; -	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; -	pci_save_state(pdev); +	SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); -	hw->back = pf; -	hw->vendor_id = pdev->vendor; -	hw->device_id = pdev->device; -	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); -	hw->subsystem_vendor_id = pdev->subsystem_vendor; -	hw->subsystem_device_id = pdev->subsystem_device; -	hw->bus.device = PCI_SLOT(pdev->devfn); -	hw->bus.func = PCI_FUNC(pdev->devfn); -	ice_set_ctrlq_len(hw); +	err = ice_register_netdev(vsi); +	if (err) +		goto err_register_netdev; -	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); +	err = ice_tc_indir_block_register(vsi); +	if (err) +		goto err_tc_indir_block_register; -#ifndef CONFIG_DYNAMIC_DEBUG -	if (debug < -1) -		hw->debug_mask = debug; -#endif +	ice_napi_add(vsi); + +	return 0; + +err_tc_indir_block_register: +	ice_unregister_netdev(vsi); +err_register_netdev: +	ice_devlink_destroy_pf_port(pf); +err_devlink_create_pf_port: +err_init_mac_fltr: +	ice_decfg_netdev(vsi); +	return err; +} + +static void ice_deinit_eth(struct ice_pf *pf) +{ +	struct ice_vsi *vsi = ice_get_main_vsi(pf); + +	if (!vsi) +		return; + +	ice_vsi_close(vsi); +	ice_unregister_netdev(vsi); +	ice_devlink_destroy_pf_port(pf); +	ice_tc_indir_block_unregister(vsi); +	ice_decfg_netdev(vsi); +} + +static int ice_init_dev(struct ice_pf *pf) +{ +	struct device *dev = ice_pf_to_dev(pf); +	struct ice_hw *hw = &pf->hw; +	int err;  	err = ice_init_hw(hw);  	if (err) {  		dev_err(dev, "ice_init_hw failed: %d\n", err); -		err = -EIO; -		goto err_exit_unroll; +		return err;  	}  	ice_init_feature_support(pf); @@ -4727,55 +4739,31 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)  	err = ice_init_pf(pf);  	if (err) {  		dev_err(dev, "ice_init_pf failed: %d\n", err); -		goto err_init_pf_unroll; +		goto err_init_pf;  	} -	ice_devlink_init_regions(pf); -  	pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;  	pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;  	pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;  	pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; -	i = 0;  	if (pf->hw.tnl.valid_count[TNL_VXLAN]) { -		pf->hw.udp_tunnel_nic.tables[i].n_entries = +		pf->hw.udp_tunnel_nic.tables[0].n_entries =  			pf->hw.tnl.valid_count[TNL_VXLAN]; -		pf->hw.udp_tunnel_nic.tables[i].tunnel_types = +		pf->hw.udp_tunnel_nic.tables[0].tunnel_types =  			UDP_TUNNEL_TYPE_VXLAN; -		i++;  	}  	if (pf->hw.tnl.valid_count[TNL_GENEVE]) { -		pf->hw.udp_tunnel_nic.tables[i].n_entries = +		pf->hw.udp_tunnel_nic.tables[1].n_entries =  			pf->hw.tnl.valid_count[TNL_GENEVE]; -		pf->hw.udp_tunnel_nic.tables[i].tunnel_types = +		pf->hw.udp_tunnel_nic.tables[1].tunnel_types =  			UDP_TUNNEL_TYPE_GENEVE; -		i++; -	} - -	pf->num_alloc_vsi = hw->func_caps.guar_num_vsi; -	if (!pf->num_alloc_vsi) { -		err = -EIO; -		goto err_init_pf_unroll; -	} -	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { -		dev_warn(&pf->pdev->dev, -			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n", -			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); -		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; -	} - -	pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi), -			       GFP_KERNEL); -	if (!pf->vsi) { -		err = -ENOMEM; -		goto err_init_pf_unroll;  	}  	err = ice_init_interrupt_scheme(pf);  	if (err) {  		dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);  		err = -EIO; -		goto err_init_vsi_unroll; +		goto err_init_interrupt_scheme;  	}  	/* In case of MSIX we are going to setup the misc vector right here @@ -4786,49 +4774,94 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)  	err = ice_req_irq_msix_misc(pf);  	if (err) {  		dev_err(dev, "setup of misc vector failed: %d\n", err); -		goto err_init_interrupt_unroll; +		goto err_req_irq_msix_misc;  	} -	/* create switch struct for the switch element created by FW on boot */ -	pf->first_sw = devm_kzalloc(dev, sizeof(*pf->first_sw), GFP_KERNEL); -	if (!pf->first_sw) { -		err = -ENOMEM; -		goto err_msix_misc_unroll; -	} +	return 0; -	if (hw->evb_veb) -		pf->first_sw->bridge_mode = BRIDGE_MODE_VEB; -	else -		pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA; +err_req_irq_msix_misc: +	ice_clear_interrupt_scheme(pf); +err_init_interrupt_scheme: +	ice_deinit_pf(pf); +err_init_pf: +	ice_deinit_hw(hw); +	return err; +} -	pf->first_sw->pf = pf; +static void ice_deinit_dev(struct ice_pf *pf) +{ +	ice_free_irq_msix_misc(pf); +	ice_clear_interrupt_scheme(pf); +	ice_deinit_pf(pf); +	ice_deinit_hw(&pf->hw); +} -	/* record the sw_id available for later use */ -	pf->first_sw->sw_id = hw->port_info->sw_id; +static void ice_init_features(struct ice_pf *pf) +{ +	struct device *dev = ice_pf_to_dev(pf); -	err = ice_setup_pf_sw(pf); -	if (err) { -		dev_err(dev, "probe failed due to setup PF switch: %d\n", err); -		goto err_alloc_sw_unroll; -	} +	if (ice_is_safe_mode(pf)) +		return; -	clear_bit(ICE_SERVICE_DIS, pf->state); +	/* initialize DDP driven features */ +	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) +		ice_ptp_init(pf); -	/* tell the firmware we are up */ -	err = ice_send_version(pf); -	if (err) { -		dev_err(dev, "probe failed sending driver version %s. error: %d\n", -			UTS_RELEASE, err); -		goto err_send_version_unroll; +	if (ice_is_feature_supported(pf, ICE_F_GNSS)) +		ice_gnss_init(pf); + +	/* Note: Flow director init failure is non-fatal to load */ +	if (ice_init_fdir(pf)) +		dev_err(dev, "could not initialize flow director\n"); + +	/* Note: DCB init failure is non-fatal to load */ +	if (ice_init_pf_dcb(pf, false)) { +		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); +		clear_bit(ICE_FLAG_DCB_ENA, pf->flags); +	} else { +		ice_cfg_lldp_mib_change(&pf->hw, true);  	} -	/* since everything is good, start the service timer */ -	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); +	if (ice_init_lag(pf)) +		dev_warn(dev, "Failed to init link aggregation support\n"); +} + +static void ice_deinit_features(struct ice_pf *pf) +{ +	ice_deinit_lag(pf); +	if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) +		ice_cfg_lldp_mib_change(&pf->hw, false); +	ice_deinit_fdir(pf); +	if (ice_is_feature_supported(pf, ICE_F_GNSS)) +		ice_gnss_exit(pf); +	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) +		ice_ptp_release(pf); +} + +static void ice_init_wakeup(struct ice_pf *pf) +{ +	/* Save wakeup reason register for later use */ +	pf->wakeup_reason = rd32(&pf->hw, PFPM_WUS); + +	/* check for a power management event */ +	ice_print_wake_reason(pf); + +	/* clear wake status, all bits */ +	wr32(&pf->hw, PFPM_WUS, U32_MAX); + +	/* Disable WoL at init, wait for user to enable */ +	device_set_wakeup_enable(ice_pf_to_dev(pf), false); +} + +static int ice_init_link(struct ice_pf *pf) +{ +	struct device *dev = ice_pf_to_dev(pf); +	int err;  	err = ice_init_link_events(pf->hw.port_info);  	if (err) {  		dev_err(dev, "ice_init_link_events failed: %d\n", err); -		goto err_send_version_unroll; +		return err;  	}  	/* not a fatal error if this fails */ @@ -4864,106 +4897,350 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)  		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);  	} -	ice_verify_cacheline_size(pf); +	return err; +} -	/* Save wakeup reason register for later use */ -	pf->wakeup_reason = rd32(hw, PFPM_WUS); +static int ice_init_pf_sw(struct ice_pf *pf) +{ +	bool dvm = ice_is_dvm_ena(&pf->hw); +	struct ice_vsi *vsi; +	int err; -	/* check for a power management event */ -	ice_print_wake_reason(pf); +	/* create switch struct for the switch element created by FW on boot */ +	pf->first_sw = kzalloc(sizeof(*pf->first_sw), GFP_KERNEL); +	if (!pf->first_sw) +		return -ENOMEM; -	/* clear wake status, all bits */ -	wr32(hw, PFPM_WUS, U32_MAX); +	if (pf->hw.evb_veb) +		pf->first_sw->bridge_mode = BRIDGE_MODE_VEB; +	else +		pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA; -	/* Disable WoL at init, wait for user to enable */ -	device_set_wakeup_enable(dev, false); +	pf->first_sw->pf = pf; -	if (ice_is_safe_mode(pf)) { -		ice_set_safe_mode_vlan_cfg(pf); -		goto probe_done; +	/* record the sw_id available for later use */ +	pf->first_sw->sw_id = pf->hw.port_info->sw_id; + +	err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL); +	if (err) +		goto err_aq_set_port_params; + +	vsi = ice_pf_vsi_setup(pf, pf->hw.port_info); +	if (!vsi) { +		err = -ENOMEM; +		goto err_pf_vsi_setup;  	} -	/* initialize DDP driven features */ -	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) -		ice_ptp_init(pf); +	return 0; -	if (ice_is_feature_supported(pf, ICE_F_GNSS)) -		ice_gnss_init(pf); +err_pf_vsi_setup: +err_aq_set_port_params: +	kfree(pf->first_sw); +	return err; +} -	/* Note: Flow director init failure is non-fatal to load */ -	if (ice_init_fdir(pf)) -		dev_err(dev, "could not initialize flow director\n"); +static void ice_deinit_pf_sw(struct ice_pf *pf) +{ +	struct ice_vsi *vsi = ice_get_main_vsi(pf); -	/* Note: DCB init failure is non-fatal to load */ -	if (ice_init_pf_dcb(pf, false)) { -		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); -		clear_bit(ICE_FLAG_DCB_ENA, pf->flags); -	} else { -		ice_cfg_lldp_mib_change(&pf->hw, true); +	if (!vsi) +		return; + +	ice_vsi_release(vsi); +	kfree(pf->first_sw); +} + +static int ice_alloc_vsis(struct ice_pf *pf) +{ +	struct device *dev = ice_pf_to_dev(pf); + +	pf->num_alloc_vsi = pf->hw.func_caps.guar_num_vsi; +	if (!pf->num_alloc_vsi) +		return -EIO; + +	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { +		dev_warn(dev, +			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n", +			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); +		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;  	} -	if (ice_init_lag(pf)) -		dev_warn(dev, "Failed to init link aggregation support\n"); +	pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi), +			       GFP_KERNEL); +	if (!pf->vsi) +		return -ENOMEM; + +	pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi, +				     sizeof(*pf->vsi_stats), GFP_KERNEL); +	if (!pf->vsi_stats) { +		devm_kfree(dev, pf->vsi); +		return -ENOMEM; +	} -	/* print PCI link speed and width */ -	pcie_print_link_status(pf->pdev); +	return 0; +} -probe_done: -	err = ice_register_netdev(pf); -	if (err) -		goto err_netdev_reg; +static void ice_dealloc_vsis(struct ice_pf *pf) +{ +	devm_kfree(ice_pf_to_dev(pf), pf->vsi_stats); +	pf->vsi_stats = NULL; + +	pf->num_alloc_vsi = 0; +	devm_kfree(ice_pf_to_dev(pf), pf->vsi); +	pf->vsi = NULL; +} + +static int ice_init_devlink(struct ice_pf *pf) +{ +	int err;  	err = ice_devlink_register_params(pf);  	if (err) -		goto err_netdev_reg; +		return err; + +	ice_devlink_init_regions(pf); +	ice_devlink_register(pf); + +	return 0; +} + +static void ice_deinit_devlink(struct ice_pf *pf) +{ +	ice_devlink_unregister(pf); +	ice_devlink_destroy_regions(pf); +	ice_devlink_unregister_params(pf); +} + +static int ice_init(struct ice_pf *pf) +{ +	int err; + +	err = ice_init_dev(pf); +	if (err) +		return err; + +	err = ice_alloc_vsis(pf); +	if (err) +		goto err_alloc_vsis; + +	err = ice_init_pf_sw(pf); +	if (err) +		goto err_init_pf_sw; + +	ice_init_wakeup(pf); + +	err = ice_init_link(pf); +	if (err) +		goto err_init_link; + +	err = ice_send_version(pf); +	if (err) +		goto err_init_link; + +	ice_verify_cacheline_size(pf); + +	if (ice_is_safe_mode(pf)) +		ice_set_safe_mode_vlan_cfg(pf); +	else +		/* print PCI link speed and width */ +		pcie_print_link_status(pf->pdev);  	/* ready to go, so clear down state bit */  	clear_bit(ICE_DOWN, pf->state); -	if (ice_is_rdma_ena(pf)) { -		pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL); -		if (pf->aux_idx < 0) { -			dev_err(dev, "Failed to allocate device ID for AUX driver\n"); -			err = -ENOMEM; -			goto err_devlink_reg_param; -		} +	clear_bit(ICE_SERVICE_DIS, pf->state); -		err = ice_init_rdma(pf); -		if (err) { -			dev_err(dev, "Failed to initialize RDMA: %d\n", err); -			err = -EIO; -			goto err_init_aux_unroll; -		} -	} else { -		dev_warn(dev, "RDMA is not supported on this device\n"); -	} +	/* since everything is good, start the service timer */ +	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); -	ice_devlink_register(pf);  	return 0; -err_init_aux_unroll: -	pf->adev = NULL; -	ida_free(&ice_aux_ida, pf->aux_idx); -err_devlink_reg_param: -	ice_devlink_unregister_params(pf); -err_netdev_reg: -err_send_version_unroll: -	ice_vsi_release_all(pf); -err_alloc_sw_unroll: +err_init_link: +	ice_deinit_pf_sw(pf); +err_init_pf_sw: +	ice_dealloc_vsis(pf); +err_alloc_vsis: +	ice_deinit_dev(pf); +	return err; +} + +static void ice_deinit(struct ice_pf *pf) +{  	set_bit(ICE_SERVICE_DIS, pf->state);  	set_bit(ICE_DOWN, pf->state); -	devm_kfree(dev, pf->first_sw); -err_msix_misc_unroll: -	ice_free_irq_msix_misc(pf); -err_init_interrupt_unroll: -	ice_clear_interrupt_scheme(pf); -err_init_vsi_unroll: -	devm_kfree(dev, pf->vsi); -err_init_pf_unroll: -	ice_deinit_pf(pf); -	ice_devlink_destroy_regions(pf); -	ice_deinit_hw(hw); -err_exit_unroll: -	pci_disable_pcie_error_reporting(pdev); + +	ice_deinit_pf_sw(pf); +	ice_dealloc_vsis(pf); +	ice_deinit_dev(pf); +} + +/** + * ice_load - load pf by init hw and starting VSI + * @pf: pointer to the pf instance + */ +int ice_load(struct ice_pf *pf) +{ +	struct ice_vsi_cfg_params params = {}; +	struct ice_vsi *vsi; +	int err; + +	err = ice_reset(&pf->hw, ICE_RESET_PFR); +	if (err) +		return err; + +	err = ice_init_dev(pf); +	if (err) +		return err; + +	vsi = ice_get_main_vsi(pf); + +	params = ice_vsi_to_params(vsi); +	params.flags = ICE_VSI_FLAG_INIT; + +	err = ice_vsi_cfg(vsi, ¶ms); +	if (err) +		goto err_vsi_cfg; + +	err = ice_start_eth(ice_get_main_vsi(pf)); +	if (err) +		goto err_start_eth; + +	err = ice_init_rdma(pf); +	if (err) +		goto err_init_rdma; + +	ice_init_features(pf); +	ice_service_task_restart(pf); + +	clear_bit(ICE_DOWN, pf->state); + +	return 0; + +err_init_rdma: +	ice_vsi_close(ice_get_main_vsi(pf)); +err_start_eth: +	ice_vsi_decfg(ice_get_main_vsi(pf)); +err_vsi_cfg: +	ice_deinit_dev(pf); +	return err; +} + +/** + * ice_unload - unload pf by stopping VSI and deinit hw + * @pf: pointer to the pf instance + */ +void ice_unload(struct ice_pf *pf) +{ +	ice_deinit_features(pf); +	ice_deinit_rdma(pf); +	ice_vsi_close(ice_get_main_vsi(pf)); +	ice_vsi_decfg(ice_get_main_vsi(pf)); +	ice_deinit_dev(pf); +} + +/** + * ice_probe - Device initialization routine + * @pdev: PCI device information struct + * @ent: entry in ice_pci_tbl + * + * Returns 0 on success, negative on failure + */ +static int +ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) +{ +	struct device *dev = &pdev->dev; +	struct ice_pf *pf; +	struct ice_hw *hw; +	int err; + +	if (pdev->is_virtfn) { +		dev_err(dev, "can't probe a virtual function\n"); +		return -EINVAL; +	} + +	/* this driver uses devres, see +	 * Documentation/driver-api/driver-model/devres.rst +	 */ +	err = pcim_enable_device(pdev); +	if (err) +		return err; + +	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev)); +	if (err) { +		dev_err(dev, "BAR0 I/O map error %d\n", err); +		return err; +	} + +	pf = ice_allocate_pf(dev); +	if (!pf) +		return -ENOMEM; + +	/* initialize Auxiliary index to invalid value */ +	pf->aux_idx = -1; + +	/* set up for high or low DMA */ +	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); +	if (err) { +		dev_err(dev, "DMA configuration failed: 0x%x\n", err); +		return err; +	} + +	pci_set_master(pdev); + +	pf->pdev = pdev; +	pci_set_drvdata(pdev, pf); +	set_bit(ICE_DOWN, pf->state); +	/* Disable service task until DOWN bit is cleared */ +	set_bit(ICE_SERVICE_DIS, pf->state); + +	hw = &pf->hw; +	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; +	pci_save_state(pdev); + +	hw->back = pf; +	hw->port_info = NULL; +	hw->vendor_id = pdev->vendor; +	hw->device_id = pdev->device; +	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); +	hw->subsystem_vendor_id = pdev->subsystem_vendor; +	hw->subsystem_device_id = pdev->subsystem_device; +	hw->bus.device = PCI_SLOT(pdev->devfn); +	hw->bus.func = PCI_FUNC(pdev->devfn); +	ice_set_ctrlq_len(hw); + +	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); + +#ifndef CONFIG_DYNAMIC_DEBUG +	if (debug < -1) +		hw->debug_mask = debug; +#endif + +	err = ice_init(pf); +	if (err) +		goto err_init; + +	err = ice_init_eth(pf); +	if (err) +		goto err_init_eth; + +	err = ice_init_rdma(pf); +	if (err) +		goto err_init_rdma; + +	err = ice_init_devlink(pf); +	if (err) +		goto err_init_devlink; + +	ice_init_features(pf); + +	return 0; + +err_init_devlink: +	ice_deinit_rdma(pf); +err_init_rdma: +	ice_deinit_eth(pf); +err_init_eth: +	ice_deinit(pf); +err_init:  	pci_disable_device(pdev);  	return err;  } @@ -5038,49 +5315,33 @@ static void ice_remove(struct pci_dev *pdev)  	struct ice_pf *pf = pci_get_drvdata(pdev);  	int i; -	ice_devlink_unregister(pf);  	for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {  		if (!ice_is_reset_in_progress(pf->state))  			break;  		msleep(100);  	} -	ice_tc_indir_block_remove(pf); -  	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {  		set_bit(ICE_VF_RESETS_DISABLED, pf->state);  		ice_free_vfs(pf);  	}  	ice_service_task_stop(pf); -  	ice_aq_cancel_waiting_tasks(pf); -	ice_unplug_aux_dev(pf); -	if (pf->aux_idx >= 0) -		ida_free(&ice_aux_ida, pf->aux_idx); -	ice_devlink_unregister_params(pf);  	set_bit(ICE_DOWN, pf->state); -	ice_deinit_lag(pf); -	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) -		ice_ptp_release(pf); -	if (ice_is_feature_supported(pf, ICE_F_GNSS)) -		ice_gnss_exit(pf);  	if (!ice_is_safe_mode(pf))  		ice_remove_arfs(pf); -	ice_setup_mc_magic_wake(pf); +	ice_deinit_features(pf); +	ice_deinit_devlink(pf); +	ice_deinit_rdma(pf); +	ice_deinit_eth(pf); +	ice_deinit(pf); +  	ice_vsi_release_all(pf); -	mutex_destroy(&(&pf->hw)->fdir_fltr_lock); + +	ice_setup_mc_magic_wake(pf);  	ice_set_wake(pf); -	ice_free_irq_msix_misc(pf); -	ice_for_each_vsi(pf, i) { -		if (!pf->vsi[i]) -			continue; -		ice_vsi_free_q_vectors(pf->vsi[i]); -	} -	ice_deinit_pf(pf); -	ice_devlink_destroy_regions(pf); -	ice_deinit_hw(&pf->hw);  	/* Issue a PFR as part of the prescribed driver unload flow.  Do not  	 * do it via ice_schedule_reset() since there is no need to rebuild @@ -5088,8 +5349,6 @@ static void ice_remove(struct pci_dev *pdev)  	 */  	ice_reset(&pf->hw, ICE_RESET_PFR);  	pci_wait_for_pending_transaction(pdev); -	ice_clear_interrupt_scheme(pf); -	pci_disable_pcie_error_reporting(pdev);  	pci_disable_device(pdev);  } @@ -5517,7 +5776,7 @@ static int __init ice_module_init(void)  	pr_info("%s\n", ice_driver_string);  	pr_info("%s\n", ice_copyright); -	ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME); +	ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME);  	if (!ice_wq) {  		pr_err("Failed to create workqueue\n");  		return -ENOMEM; @@ -6123,24 +6382,21 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi)  }  /** - * ice_vsi_cfg - Setup the VSI + * ice_vsi_cfg_lan - Setup the VSI lan related config   * @vsi: the VSI being configured   *   * Return 0 on success and negative value on error   */ -int ice_vsi_cfg(struct ice_vsi *vsi) +int ice_vsi_cfg_lan(struct ice_vsi *vsi)  {  	int err; -	if (vsi->netdev) { +	if (vsi->netdev && vsi->type == ICE_VSI_PF) {  		ice_set_rx_mode(vsi->netdev); -		if (vsi->type != ICE_VSI_LB) { -			err = ice_vsi_vlan_setup(vsi); - -			if (err) -				return err; -		} +		err = ice_vsi_vlan_setup(vsi); +		if (err) +			return err;  	}  	ice_vsi_cfg_dcb_rings(vsi); @@ -6321,19 +6577,20 @@ static int ice_up_complete(struct ice_vsi *vsi)  	if (vsi->port_info &&  	    (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) && -	    vsi->netdev) { +	    vsi->netdev && vsi->type == ICE_VSI_PF) {  		ice_print_link_msg(vsi, true);  		netif_tx_start_all_queues(vsi->netdev);  		netif_carrier_on(vsi->netdev); -		if (!ice_is_e810(&pf->hw)) -			ice_ptp_link_change(pf, pf->hw.pf_id, true); +		ice_ptp_link_change(pf, pf->hw.pf_id, true);  	}  	/* Perform an initial read of the statistics registers now to  	 * set the baseline so counters are ready when interface is up  	 */  	ice_update_eth_stats(vsi); -	ice_service_task_schedule(pf); + +	if (vsi->type == ICE_VSI_PF) +		ice_service_task_schedule(pf);  	return 0;  } @@ -6346,7 +6603,7 @@ int ice_up(struct ice_vsi *vsi)  {  	int err; -	err = ice_vsi_cfg(vsi); +	err = ice_vsi_cfg_lan(vsi);  	if (!err)  		err = ice_up_complete(vsi); @@ -6370,10 +6627,10 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,  	unsigned int start;  	do { -		start = u64_stats_fetch_begin_irq(syncp); +		start = u64_stats_fetch_begin(syncp);  		*pkts = stats.pkts;  		*bytes = stats.bytes; -	} while (u64_stats_fetch_retry_irq(syncp, start)); +	} while (u64_stats_fetch_retry(syncp, start));  }  /** @@ -6395,14 +6652,16 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,  		u64 pkts = 0, bytes = 0;  		ring = READ_ONCE(rings[i]); -		if (!ring) +		if (!ring || !ring->ring_stats)  			continue; -		ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); +		ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp, +					     ring->ring_stats->stats, &pkts, +					     &bytes);  		vsi_stats->tx_packets += pkts;  		vsi_stats->tx_bytes += bytes; -		vsi->tx_restart += ring->tx_stats.restart_q; -		vsi->tx_busy += ring->tx_stats.tx_busy; -		vsi->tx_linearize += ring->tx_stats.tx_linearize; +		vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; +		vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy; +		vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize;  	}  } @@ -6412,6 +6671,7 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,   */  static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)  { +	struct rtnl_link_stats64 *net_stats, *stats_prev;  	struct rtnl_link_stats64 *vsi_stats;  	u64 pkts, bytes;  	int i; @@ -6436,12 +6696,16 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)  	/* update Rx rings counters */  	ice_for_each_rxq(vsi, i) {  		struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); +		struct ice_ring_stats *ring_stats; -		ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); +		ring_stats = ring->ring_stats; +		ice_fetch_u64_stats_per_ring(&ring_stats->syncp, +					     ring_stats->stats, &pkts, +					     &bytes);  		vsi_stats->rx_packets += pkts;  		vsi_stats->rx_bytes += bytes; -		vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; -		vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; +		vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; +		vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed;  	}  	/* update XDP Tx rings counters */ @@ -6451,10 +6715,28 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)  	rcu_read_unlock(); -	vsi->net_stats.tx_packets = vsi_stats->tx_packets; -	vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; -	vsi->net_stats.rx_packets = vsi_stats->rx_packets; -	vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; +	net_stats = &vsi->net_stats; +	stats_prev = &vsi->net_stats_prev; + +	/* clear prev counters after reset */ +	if (vsi_stats->tx_packets < stats_prev->tx_packets || +	    vsi_stats->rx_packets < stats_prev->rx_packets) { +		stats_prev->tx_packets = 0; +		stats_prev->tx_bytes = 0; +		stats_prev->rx_packets = 0; +		stats_prev->rx_bytes = 0; +	} + +	/* update netdev counters */ +	net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; +	net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; +	net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; +	net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; + +	stats_prev->tx_packets = vsi_stats->tx_packets; +	stats_prev->tx_bytes = vsi_stats->tx_bytes; +	stats_prev->rx_packets = vsi_stats->rx_packets; +	stats_prev->rx_bytes = vsi_stats->rx_bytes;  	kfree(vsi_stats);  } @@ -6516,6 +6798,9 @@ void ice_update_pf_stats(struct ice_pf *pf)  	prev_ps = &pf->stats_prev;  	cur_ps = &pf->stats; +	if (ice_is_reset_in_progress(pf->state)) +		pf->stat_prev_loaded = false; +  	ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,  			  &prev_ps->eth.rx_bytes,  			  &cur_ps->eth.rx_bytes); @@ -6730,8 +7015,7 @@ int ice_down(struct ice_vsi *vsi)  	if (vsi->netdev && vsi->type == ICE_VSI_PF) {  		vlan_err = ice_vsi_del_vlan_zero(vsi); -		if (!ice_is_e810(&vsi->back->hw)) -			ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); +		ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);  		netif_carrier_off(vsi->netdev);  		netif_tx_disable(vsi->netdev);  	} else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { @@ -6887,7 +7171,7 @@ int ice_vsi_open_ctrl(struct ice_vsi *vsi)  	if (err)  		goto err_setup_rx; -	err = ice_vsi_cfg(vsi); +	err = ice_vsi_cfg_lan(vsi);  	if (err)  		goto err_setup_rx; @@ -6941,7 +7225,7 @@ int ice_vsi_open(struct ice_vsi *vsi)  	if (err)  		goto err_setup_rx; -	err = ice_vsi_cfg(vsi); +	err = ice_vsi_cfg_lan(vsi);  	if (err)  		goto err_setup_rx; @@ -7026,7 +7310,7 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)  			continue;  		/* rebuild the VSI */ -		err = ice_vsi_rebuild(vsi, true); +		err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);  		if (err) {  			dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",  				err, vsi->idx, ice_vsi_type_str(type)); @@ -7282,18 +7566,6 @@ clear_recovery:  }  /** - * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP - * @vsi: Pointer to VSI structure - */ -static int ice_max_xdp_frame_size(struct ice_vsi *vsi) -{ -	if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) -		return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM; -	else -		return ICE_RXBUF_3072; -} - -/**   * ice_change_mtu - NDO callback to change the MTU   * @netdev: network interface device structure   * @new_mtu: new value for maximum frame size @@ -7305,6 +7577,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)  	struct ice_netdev_priv *np = netdev_priv(netdev);  	struct ice_vsi *vsi = np->vsi;  	struct ice_pf *pf = vsi->back; +	struct bpf_prog *prog;  	u8 count = 0;  	int err = 0; @@ -7313,7 +7586,8 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)  		return 0;  	} -	if (ice_is_xdp_ena_vsi(vsi)) { +	prog = vsi->xdp_prog; +	if (prog && !prog->aux->xdp_has_frags) {  		int frame_size = ice_max_xdp_frame_size(vsi);  		if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) { @@ -7321,6 +7595,12 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)  				   frame_size - ICE_ETH_PKT_HDR_PAD);  			return -EINVAL;  		} +	} else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) { +		if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) { +			netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n", +				   ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD); +			return -EINVAL; +		}  	}  	/* if a reset is in progress, wait for some time for it to complete */ @@ -8283,7 +8563,7 @@ static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)  		rule.rid = fltr->rid;  		rule.rule_id = fltr->rule_id; -		rule.vsi_handle = fltr->dest_id; +		rule.vsi_handle = fltr->dest_vsi_handle;  		status = ice_rem_adv_rule_by_id(&pf->hw, &rule);  		if (status) {  			if (status == -ENOENT) @@ -8371,12 +8651,9 @@ static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)  		/* clear the VSI from scheduler tree */  		ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx); -		/* Delete VSI from FW */ +		/* Delete VSI from FW, PF and HW VSI arrays */  		ice_vsi_delete(ch->ch_vsi); -		/* Delete VSI from PF and HW VSI arrays */ -		ice_vsi_clear(ch->ch_vsi); -  		/* free the channel */  		kfree(ch);  	} @@ -8435,7 +8712,7 @@ static int ice_rebuild_channels(struct ice_pf *pf)  		type = vsi->type;  		/* rebuild ADQ VSI */ -		err = ice_vsi_rebuild(vsi, true); +		err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);  		if (err) {  			dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",  				ice_vsi_type_str(type), vsi->idx, err); @@ -8595,6 +8872,12 @@ static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data)  	switch (mode) {  	case TC_MQPRIO_MODE_CHANNEL: +		if (pf->hw.port_info->is_custom_tx_enabled) { +			dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n"); +			return -EBUSY; +		} +		ice_tear_down_devlink_rate_tree(pf); +  		ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);  		if (ret) {  			netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", @@ -8661,14 +8944,14 @@ config_tcf:  	cur_rxq = vsi->num_rxq;  	/* proceed with rebuild main VSI using correct number of queues */ -	ret = ice_vsi_rebuild(vsi, false); +	ret = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);  	if (ret) {  		/* fallback to current number of queues */  		dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");  		vsi->req_txq = cur_txq;  		vsi->req_rxq = cur_rxq;  		clear_bit(ICE_RESET_FAILED, pf->state); -		if (ice_vsi_rebuild(vsi, false)) { +		if (ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT)) {  			dev_err(dev, "Rebuild of main VSI failed again\n");  			return ret;  		} @@ -9108,5 +9391,4 @@ static const struct net_device_ops ice_netdev_ops = {  	.ndo_bpf = ice_xdp,  	.ndo_xdp_xmit = ice_xdp_xmit,  	.ndo_xsk_wakeup = ice_xsk_wakeup, -	.ndo_get_devlink_port = ice_get_devlink_port,  };  |