diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
38 files changed, 4059 insertions, 2918 deletions
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index 736a9f087bc9..c58a5377a287 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw) * fm10k_read_hw_stats_32b - Reads value of 32-bit registers * @hw: pointer to the hardware structure * @addr: address of register containing a 32-bit value + * @stat: pointer to structure holding hw stat information * * Function reads the content of the register and returns the delta * between the base and the current value. @@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr, * fm10k_read_hw_stats_48b - Reads value of 48-bit registers * @hw: pointer to the hardware structure * @addr: address of register containing the lower 32-bit value + * @stat: pointer to structure holding hw stat information * * Function reads the content of 2 registers, combined to represent a 48-bit * statistical value. Extra processing is required to handle overflowing. @@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q, /** * fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues - * @hw: pointer to the hardware structure * @q: pointer to the ring of hardware statistics queue * @idx: index pointing to the start of the ring iteration * @count: number of queues to iterate over diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 8e12aae065d8..2c93d719438f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -28,13 +28,13 @@ #include "fm10k.h" -#define DRV_VERSION "0.22.1-k" +#define DRV_VERSION "0.23.4-k" #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; static const char fm10k_driver_string[] = DRV_SUMMARY; static const char fm10k_copyright[] = - "Copyright(c) 2013 - 2017 Intel Corporation."; + "Copyright(c) 2013 - 2018 Intel Corporation."; MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION(DRV_SUMMARY); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index a38ae5c54da3..75c99aed3c41 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports, /** * fm10k_udp_tunnel_add - * @netdev: network interface device structure + * @dev: network interface device structure * @ti: Tunnel endpoint information * * This function is called when a new UDP tunnel port has been added. @@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev, /** * fm10k_udp_tunnel_del - * @netdev: network interface device structure - * @ti: Tunnel endpoint information + * @dev: network interface device structure + * @ti: Tunnel end point information * * This function is called when a new UDP tunnel port is deleted. The freed * port will be removed from the list, then we reprogram the offloaded port @@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface, * @glort: the target glort for this update * @addr: the address to update * @vid: the vid to update - * @sync: whether to add or remove + * @set: whether to add or remove * * This function queues up a MAC request for sending to the switch manager. * A separate thread monitors the queue and sends updates to the switch diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index a434fecfdfeb..50f53e403ef5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = { [fm10k_device_vf] = &fm10k_vf_info, }; -/** +/* * fm10k_pci_tbl - PCI Device ID Table * * Wildcard entries (PCI_ANY_ID) should come last @@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface) /** * fm10k_service_timer - Timer Call-back - * @data: pointer to interface cast into an unsigned long + * @t: pointer to timer data **/ static void fm10k_service_timer(struct timer_list *t) { @@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface) /** * fm10k_watchdog_flush_tx - flush queues on host not ready - * @interface - pointer to the device interface structure + * @interface: pointer to the device interface structure **/ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface) { @@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface) /** * fm10k_watchdog_subtask - check and bring link up - * @interface - pointer to the device interface structure + * @interface: pointer to the device interface structure **/ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface) { @@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface) /** * fm10k_check_hang_subtask - check for hung queues and dropped interrupts - * @interface - pointer to the device interface structure + * @interface: pointer to the device interface structure * * This function serves two purposes. First it strobes the interrupt lines * in order to make certain interrupts are occurring. Secondly it sets the @@ -1995,6 +1995,7 @@ skip_tx_dma_drain: /** * fm10k_sw_init - Initialize general software structures * @interface: host interface private structure to initialize + * @ent: PCI device ID entry * * fm10k_sw_init initializes the interface private data structure. * Fields are initialized based on PCI device information and diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index d6406fc31ffb..bee192fe2ffb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results, /** * fm10k_iov_select_vid - Select correct default VLAN ID - * @hw: Pointer to hardware structure + * @vf_info: pointer to VF information structure * @vid: VLAN ID to correct * * Will report an error if the VLAN ID is out of range. For VID = 0, it will diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c index f8e87bf086b9..9d0d31da426b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string) * @msg: Pointer to message block * @attr_id: Attribute ID * @mac_addr: MAC address to be stored + * @vlan: VLAN to be stored * * This function will reorder a MAC address to be CPU endian and store it * in the attribute buffer. It will return success if provided with a @@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id, /** * fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute * @attr: Pointer to attribute - * @attr_id: Attribute ID * @mac_addr: location of buffer to store MAC address + * @vlan: location of buffer to store VLAN * * This function pulls the MAC address back out of the attribute and will * place it in the array pointed by by mac_addr. It will return success @@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results, * @hw: Pointer to hardware structure * @msg: Pointer to message * @mbx: Pointer to mailbox information structure - * @func: Function array containing list of message handling functions + * @data: Pointer to message handler data structure * * This function should be the first function called upon receiving a * message. The handler will identify the message type and call the correct diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 46e9f4e0a02c..271ab1a861b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -507,6 +507,7 @@ struct i40e_pf { #define I40E_HW_STOP_FW_LLDP BIT(16) #define I40E_HW_PORT_ID_VALID BIT(17) #define I40E_HW_RESTART_AUTONEG BIT(18) +#define I40E_HW_STOPPABLE_FW_LLDP BIT(19) u64 flags; #define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(0) @@ -824,6 +825,7 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ cpumask_t affinity_mask; @@ -832,8 +834,6 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 should adjust ITR */ } ____cacheline_internodealigned_in_smp; /* lan device */ @@ -1041,6 +1041,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi); void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset); void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs); void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id); +void i40e_client_update_msix_info(struct i40e_pf *pf); int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id); /** * i40e_irq_dynamic_enable - Enable default interrupt generation settings @@ -1109,4 +1110,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); +int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); +int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index a852775d3059..0dfc52772c45 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_DEFAULT = 0xFF, }; +#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \ + BIT_ULL(I40E_PHY_TYPE_XAUI) | \ + BIT_ULL(I40E_PHY_TYPE_XFI) | \ + BIT_ULL(I40E_PHY_TYPE_SFI) | \ + BIT_ULL(I40E_PHY_TYPE_XLAUI) | \ + BIT_ULL(I40E_PHY_TYPE_XLPPI) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \ + BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \ + BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \ + BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \ + BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC)) + #define I40E_LINK_SPEED_100MB_SHIFT 0x1 #define I40E_LINK_SPEED_1000MB_SHIFT 0x2 #define I40E_LINK_SPEED_10GB_SHIFT 0x3 diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 0de9610c1d8d..704695a61645 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -287,6 +287,17 @@ out: return capable; } +void i40e_client_update_msix_info(struct i40e_pf *pf) +{ + struct i40e_client_instance *cdev = pf->cinst; + + if (!cdev || !cdev->client) + return; + + cdev->lan_info.msix_count = pf->num_iwarp_msix; + cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector]; +} + /** * i40e_client_add_instance - add a client instance struct to the instance list * @pf: pointer to the board struct @@ -328,9 +339,6 @@ static void i40e_client_add_instance(struct i40e_pf *pf) return; } - cdev->lan_info.msix_count = pf->num_iwarp_msix; - cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector]; - mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list, struct netdev_hw_addr, list); if (mac) @@ -340,6 +348,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf) cdev->client = registered_client; pf->cinst = cdev; + + i40e_client_update_msix_info(pf); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index ef5a868aae46..4fa31d87d9d2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1208,6 +1208,29 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) return media; } +/** + * i40e_poll_globr - Poll for Global Reset completion + * @hw: pointer to the hardware structure + * @retry_limit: how many times to retry before failure + **/ +static i40e_status i40e_poll_globr(struct i40e_hw *hw, + u32 retry_limit) +{ + u32 cnt, reg = 0; + + for (cnt = 0; cnt < retry_limit; cnt++) { + reg = rd32(hw, I40E_GLGEN_RSTAT); + if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK)) + return 0; + msleep(100); + } + + hw_dbg(hw, "Global reset failed.\n"); + hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg); + + return I40E_ERR_RESET_FAILED; +} + #define I40E_PF_RESET_WAIT_COUNT_A0 200 #define I40E_PF_RESET_WAIT_COUNT 200 /** @@ -1284,14 +1307,14 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK)) break; reg2 = rd32(hw, I40E_GLGEN_RSTAT); - if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) { - hw_dbg(hw, "Core reset upcoming. Skipping PF reset request.\n"); - hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg2); - return I40E_ERR_NOT_READY; - } + if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) + break; usleep_range(1000, 2000); } - if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) { + if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) { + if (i40e_poll_globr(hw, grst_del)) + return I40E_ERR_RESET_FAILED; + } else if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) { hw_dbg(hw, "PF reset polling failed to complete.\n"); return I40E_ERR_RESET_FAILED; } @@ -2415,6 +2438,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw, * i40e_aq_set_switch_config * @hw: pointer to the hardware structure * @flags: bit flag values to set + * @mode: cloud filter mode * @valid_flags: which bit flags to set * @mode: cloud filter mode * @cmd_details: pointer to command details structure or NULL @@ -3200,9 +3224,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, u32 valid_functions, num_functions; u32 number, logical_id, phys_id; struct i40e_hw_capabilities *p; + u16 id, ocp_cfg_word0; + i40e_status status; u8 major_rev; u32 i = 0; - u16 id; cap = (struct i40e_aqc_list_capabilities_element_resp *) buff; @@ -3389,6 +3414,26 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, hw->num_ports++; } + /* OCP cards case: if a mezz is removed the Ethernet port is at + * disabled state in PRTGEN_CNF register. Additional NVM read is + * needed in order to check if we are dealing with OCP card. + * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting + * physical ports results in wrong partition id calculation and thus + * not supporting WoL. + */ + if (hw->mac.type == I40E_MAC_X722) { + if (!i40e_acquire_nvm(hw, I40E_RESOURCE_READ)) { + status = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR, + 2 * I40E_SR_OCP_CFG_WORD0, + sizeof(ocp_cfg_word0), + &ocp_cfg_word0, true, NULL); + if (!status && + (ocp_cfg_word0 & I40E_SR_OCP_ENABLED)) + hw->num_ports = 4; + i40e_release_nvm(hw); + } + } + valid_functions = p->valid_functions; num_functions = 0; while (valid_functions) { @@ -5531,7 +5576,7 @@ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid, * function. * **/ -i40e_status +enum i40e_status_code i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid, struct i40e_aqc_cloud_filters_element_bb *filters, u8 filter_count) @@ -5625,7 +5670,7 @@ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid, * function. * **/ -i40e_status +enum i40e_status_code i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, struct i40e_aqc_cloud_filters_element_bb *filters, u8 filter_count) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 4c3b4243cf65..b829fd365693 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -155,8 +155,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", (unsigned long int)nd->vlan_features); } - dev_info(&pf->pdev->dev, - " vlgrp: & = %p\n", vsi->active_vlans); + dev_info(&pf->pdev->dev, " active_vlans is %s\n", + vsi->active_vlans ? "<valid>" : "<null>"); dev_info(&pf->pdev->dev, " flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags); @@ -270,14 +270,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) continue; dev_info(&pf->pdev->dev, - " rx_rings[%i]: desc = %p\n", - i, rx_ring->desc); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n", - i, rx_ring->dev, - rx_ring->netdev, - rx_ring->rx_bi); - dev_info(&pf->pdev->dev, " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", i, *rx_ring->state, rx_ring->queue_index, @@ -307,17 +299,12 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.realloc_count, rx_ring->rx_stats.page_reuse_count); dev_info(&pf->pdev->dev, - " rx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, rx_ring->size, - (unsigned long int)rx_ring->dma); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: vsi = %p, q_vector = %p\n", - i, rx_ring->vsi, - rx_ring->q_vector); + " rx_rings[%i]: size = %i\n", + i, rx_ring->size); dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_itr_setting = %d (%s)\n", - i, rx_ring->rx_itr_setting, - ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); + " rx_rings[%i]: itr_setting = %d (%s)\n", + i, rx_ring->itr_setting, + ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); @@ -326,14 +313,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) continue; dev_info(&pf->pdev->dev, - " tx_rings[%i]: desc = %p\n", - i, tx_ring->desc); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n", - i, tx_ring->dev, - tx_ring->netdev, - tx_ring->tx_bi); - dev_info(&pf->pdev->dev, " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", i, *tx_ring->state, tx_ring->queue_index, @@ -355,20 +334,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) tx_ring->tx_stats.tx_busy, tx_ring->tx_stats.tx_done_old); dev_info(&pf->pdev->dev, - " tx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, tx_ring->size, - (unsigned long int)tx_ring->dma); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: vsi = %p, q_vector = %p\n", - i, tx_ring->vsi, - tx_ring->q_vector); + " tx_rings[%i]: size = %i\n", + i, tx_ring->size); dev_info(&pf->pdev->dev, " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_itr_setting = %d (%s)\n", - i, tx_ring->tx_itr_setting, - ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); + " tx_rings[%i]: itr_setting = %d (%s)\n", + i, tx_ring->itr_setting, + ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, @@ -466,8 +440,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->info.resp_reserved[6], vsi->info.resp_reserved[7], vsi->info.resp_reserved[8], vsi->info.resp_reserved[9], vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]); - if (vsi->back) - dev_info(&pf->pdev->dev, " PF = %p\n", vsi->back); dev_info(&pf->pdev->dev, " idx = %d\n", vsi->idx); dev_info(&pf->pdev->dev, " tc_config: numtc = %d, enabled_tc = 0x%x\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 2f5bee713fef..0c7e7de595d3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0), I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0), I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0), + I40E_PRIV_FLAG("link-down-on-close", + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0), I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0), I40E_PRIV_FLAG("disable-source-pruning", I40E_FLAG_SOURCE_PRUNING_DISABLED, 0), @@ -857,7 +859,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev, if (hw->device_id == I40E_DEV_ID_KX_B || hw->device_id == I40E_DEV_ID_KX_C || hw->device_id == I40E_DEV_ID_20G_KR2 || - hw->device_id == I40E_DEV_ID_20G_KR2_A) { + hw->device_id == I40E_DEV_ID_20G_KR2_A || + hw->device_id == I40E_DEV_ID_25G_B || + hw->device_id == I40E_DEV_ID_KX_X722) { netdev_info(netdev, "Changing settings is not supported on backplane.\n"); return -EOPNOTSUPP; } @@ -868,23 +872,21 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* save autoneg out of ksettings */ autoneg = copy_ks.base.autoneg; - memset(&safe_ks, 0, sizeof(safe_ks)); + /* get our own copy of the bits to check against */ + memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings)); + safe_ks.base.cmd = copy_ks.base.cmd; + safe_ks.base.link_mode_masks_nwords = + copy_ks.base.link_mode_masks_nwords; + i40e_get_link_ksettings(netdev, &safe_ks); + /* Get link modes supported by hardware and check against modes * requested by the user. Return an error if unsupported mode was set. */ - i40e_phy_type_to_ethtool(pf, &safe_ks); if (!bitmap_subset(copy_ks.link_modes.advertising, safe_ks.link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS)) return -EINVAL; - /* get our own copy of the bits to check against */ - memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings)); - safe_ks.base.cmd = copy_ks.base.cmd; - safe_ks.base.link_mode_masks_nwords = - copy_ks.base.link_mode_masks_nwords; - i40e_get_link_ksettings(netdev, &safe_ks); - /* set autoneg back to what it currently is */ copy_ks.base.autoneg = safe_ks.base.autoneg; @@ -2244,14 +2246,14 @@ static int __i40e_get_coalesce(struct net_device *netdev, rx_ring = vsi->rx_rings[queue]; tx_ring = vsi->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2311,34 +2313,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; - u16 vector, intrl; + u16 intrl; intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit); - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); if (ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; if (ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ + + wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl); i40e_flush(hw); } @@ -2364,11 +2367,11 @@ static int __i40e_set_coalesce(struct net_device *netdev, vsi->work_limit = ec->tx_max_coalesced_frames_irq; if (queue < 0) { - cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[0]->itr_setting; + cur_tx_itr = vsi->tx_rings[0]->itr_setting; } else if (queue < vsi->num_queue_pairs) { - cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[queue]->itr_setting; + cur_tx_itr = vsi->tx_rings[queue]->itr_setting; } else { netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", vsi->num_queue_pairs - 1); @@ -2396,7 +2399,7 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->rx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -2407,16 +2410,16 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->tx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } if (ec->use_adaptive_rx_coalesce && !cur_rx_itr) - ec->rx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->rx_coalesce_usecs = I40E_MIN_ITR; if (ec->use_adaptive_tx_coalesce && !cur_tx_itr) - ec->tx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->tx_coalesce_usecs = I40E_MIN_ITR; intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high); vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg); @@ -4406,6 +4409,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } flags_complete: + changed_flags = orig_flags ^ new_flags; + /* Before we finalize any flag changes, we need to perform some * checks to ensure that the changes are supported and safe. */ @@ -4415,21 +4420,17 @@ flags_complete: !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)) return -EOPNOTSUPP; - /* Disable FW LLDP not supported if NPAR active or if FW - * API version < 1.7 + /* If the driver detected FW LLDP was disabled on init, this flag could + * be set, however we do not support _changing_ the flag if NPAR is + * enabled or FW API version < 1.7. There are situations where older + * FW versions/NPAR enabled PFs could disable LLDP, however we _must_ + * not allow the user to enable/disable LLDP with this flag on + * unsupported FW versions. */ - if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { - if (pf->hw.func_caps.npar_enable) { - dev_warn(&pf->pdev->dev, - "Unable to stop FW LLDP if NPAR active\n"); - return -EOPNOTSUPP; - } - - if (pf->hw.aq.api_maj_ver < 1 || - (pf->hw.aq.api_maj_ver == 1 && - pf->hw.aq.api_min_ver < 7)) { + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { + if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) { dev_warn(&pf->pdev->dev, - "FW ver does not support stopping FW LLDP\n"); + "Device does not support changing FW LLDP\n"); return -EOPNOTSUPP; } } @@ -4439,6 +4440,10 @@ flags_complete: * something else has modified the flags variable since we copied it * originally. We'll just punt with an error and log something in the * message buffer. + * + * This is the point of no return for this function. We need to have + * checked any discrepancies or misconfigurations and returned + * EOPNOTSUPP before updating pf->flags here. */ if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { dev_warn(&pf->pdev->dev, @@ -4446,8 +4451,6 @@ flags_complete: return -EAGAIN; } - changed_flags = orig_flags ^ new_flags; - /* Process any additional changes needed as a result of flag changes. * The changed_flags value reflects the list of bits that were * changed in the code above. @@ -4479,6 +4482,12 @@ flags_complete: } } + if ((changed_flags & pf->flags & + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && + (pf->flags & I40E_FLAG_MFP_ENABLED)) + dev_warn(&pf->pdev->dev, + "Turning on link-down-on-close flag may affect other partitions\n"); + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { struct i40e_dcbx_config *dcbcfg; diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c deleted file mode 100644 index 2d1253c5b7a1..000000000000 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ /dev/null @@ -1,1571 +0,0 @@ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ - -#include <linux/if_ether.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_device.h> -#include <scsi/fc/fc_fs.h> -#include <scsi/fc/fc_fip.h> -#include <scsi/fc/fc_fcoe.h> -#include <scsi/libfc.h> -#include <scsi/libfcoe.h> -#include <uapi/linux/dcbnl.h> - -#include "i40e.h" -#include "i40e_fcoe.h" - -/** - * i40e_fcoe_sof_is_class2 - returns true if this is a FC Class 2 SOF - * @sof: the FCoE start of frame delimiter - **/ -static inline bool i40e_fcoe_sof_is_class2(u8 sof) -{ - return (sof == FC_SOF_I2) || (sof == FC_SOF_N2); -} - -/** - * i40e_fcoe_sof_is_class3 - returns true if this is a FC Class 3 SOF - * @sof: the FCoE start of frame delimiter - **/ -static inline bool i40e_fcoe_sof_is_class3(u8 sof) -{ - return (sof == FC_SOF_I3) || (sof == FC_SOF_N3); -} - -/** - * i40e_fcoe_sof_is_supported - returns true if the FC SOF is supported by HW - * @sof: the input SOF value from the frame - **/ -static inline bool i40e_fcoe_sof_is_supported(u8 sof) -{ - return i40e_fcoe_sof_is_class2(sof) || - i40e_fcoe_sof_is_class3(sof); -} - -/** - * i40e_fcoe_fc_sof - pull the SOF from FCoE header in the frame - * @skb: the frame whose EOF is to be pulled from - **/ -static inline int i40e_fcoe_fc_sof(struct sk_buff *skb, u8 *sof) -{ - *sof = ((struct fcoe_hdr *)skb_network_header(skb))->fcoe_sof; - - if (!i40e_fcoe_sof_is_supported(*sof)) - return -EINVAL; - return 0; -} - -/** - * i40e_fcoe_eof_is_supported - returns true if the EOF is supported by HW - * @eof: the input EOF value from the frame - **/ -static inline bool i40e_fcoe_eof_is_supported(u8 eof) -{ - return (eof == FC_EOF_N) || (eof == FC_EOF_T) || - (eof == FC_EOF_NI) || (eof == FC_EOF_A); -} - -/** - * i40e_fcoe_fc_eof - pull EOF from FCoE trailer in the frame - * @skb: the frame whose EOF is to be pulled from - **/ -static inline int i40e_fcoe_fc_eof(struct sk_buff *skb, u8 *eof) -{ - /* the first byte of the last dword is EOF */ - skb_copy_bits(skb, skb->len - 4, eof, 1); - - if (!i40e_fcoe_eof_is_supported(*eof)) - return -EINVAL; - return 0; -} - -/** - * i40e_fcoe_ctxt_eof - convert input FC EOF for descriptor programming - * @eof: the input eof value from the frame - * - * The FC EOF is converted to the value understood by HW for descriptor - * programming. Never call this w/o calling i40e_fcoe_eof_is_supported() - * first and that already checks for all supported valid eof values. - **/ -static inline u32 i40e_fcoe_ctxt_eof(u8 eof) -{ - switch (eof) { - case FC_EOF_N: - return I40E_TX_DESC_CMD_L4T_EOFT_EOF_N; - case FC_EOF_T: - return I40E_TX_DESC_CMD_L4T_EOFT_EOF_T; - case FC_EOF_NI: - return I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI; - case FC_EOF_A: - return I40E_TX_DESC_CMD_L4T_EOFT_EOF_A; - default: - /* Supported valid eof shall be already checked by - * calling i40e_fcoe_eof_is_supported() first, - * therefore this default case shall never hit. - */ - WARN_ON(1); - return -EINVAL; - } -} - -/** - * i40e_fcoe_xid_is_valid - returns true if the exchange id is valid - * @xid: the exchange id - **/ -static inline bool i40e_fcoe_xid_is_valid(u16 xid) -{ - return (xid != FC_XID_UNKNOWN) && (xid < I40E_FCOE_DDP_MAX); -} - -/** - * i40e_fcoe_ddp_unmap - unmap the mapped sglist associated - * @pf: pointer to PF - * @ddp: sw DDP context - * - * Unmap the scatter-gather list associated with the given SW DDP context - * - * Returns: data length already ddp-ed in bytes - * - **/ -static inline void i40e_fcoe_ddp_unmap(struct i40e_pf *pf, - struct i40e_fcoe_ddp *ddp) -{ - if (test_and_set_bit(__I40E_FCOE_DDP_UNMAPPED, &ddp->flags)) - return; - - if (ddp->sgl) { - dma_unmap_sg(&pf->pdev->dev, ddp->sgl, ddp->sgc, - DMA_FROM_DEVICE); - ddp->sgl = NULL; - ddp->sgc = 0; - } - - if (ddp->pool) { - dma_pool_free(ddp->pool, ddp->udl, ddp->udp); - ddp->pool = NULL; - } -} - -/** - * i40e_fcoe_ddp_clear - clear the given SW DDP context - * @ddp - SW DDP context - **/ -static inline void i40e_fcoe_ddp_clear(struct i40e_fcoe_ddp *ddp) -{ - memset(ddp, 0, sizeof(struct i40e_fcoe_ddp)); - ddp->xid = FC_XID_UNKNOWN; - ddp->flags = __I40E_FCOE_DDP_NONE; -} - -/** - * i40e_fcoe_progid_is_fcoe - check if the prog_id is for FCoE - * @id: the prog id for the programming status Rx descriptor write-back - **/ -static inline bool i40e_fcoe_progid_is_fcoe(u8 id) -{ - return (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) || - (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS); -} - -/** - * i40e_fcoe_fc_get_xid - get xid from the frame header - * @fh: the fc frame header - * - * In case the incoming frame's exchange is originated from - * the initiator, then received frame's exchange id is ANDed - * with fc_cpu_mask bits to get the same cpu on which exchange - * was originated, otherwise just use the current cpu. - * - * Returns ox_id if exchange originator, rx_id if responder - **/ -static inline u16 i40e_fcoe_fc_get_xid(struct fc_frame_header *fh) -{ - u32 f_ctl = ntoh24(fh->fh_f_ctl); - - return (f_ctl & FC_FC_EX_CTX) ? - be16_to_cpu(fh->fh_ox_id) : - be16_to_cpu(fh->fh_rx_id); -} - -/** - * i40e_fcoe_fc_frame_header - get fc frame header from skb - * @skb: packet - * - * This checks if there is a VLAN header and returns the data - * pointer to the start of the fc_frame_header. - * - * Returns pointer to the fc_frame_header - **/ -static inline struct fc_frame_header *i40e_fcoe_fc_frame_header( - struct sk_buff *skb) -{ - void *fh = skb->data + sizeof(struct fcoe_hdr); - - if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)) - fh += sizeof(struct vlan_hdr); - - return (struct fc_frame_header *)fh; -} - -/** - * i40e_fcoe_ddp_put - release the DDP context for a given exchange id - * @netdev: the corresponding net_device - * @xid: the exchange id that corresponding DDP context will be released - * - * This is the implementation of net_device_ops.ndo_fcoe_ddp_done - * and it is expected to be called by ULD, i.e., FCP layer of libfc - * to release the corresponding ddp context when the I/O is done. - * - * Returns : data length already ddp-ed in bytes - **/ -static int i40e_fcoe_ddp_put(struct net_device *netdev, u16 xid) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_pf *pf = np->vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - int len = 0; - struct i40e_fcoe_ddp *ddp = &fcoe->ddp[xid]; - - if (!fcoe || !ddp) - goto out; - - if (test_bit(__I40E_FCOE_DDP_DONE, &ddp->flags)) - len = ddp->len; - i40e_fcoe_ddp_unmap(pf, ddp); -out: - return len; -} - -/** - * i40e_fcoe_sw_init - sets up the HW for FCoE - * @pf: pointer to PF - **/ -void i40e_init_pf_fcoe(struct i40e_pf *pf) -{ - struct i40e_hw *hw = &pf->hw; - u32 val; - - pf->flags &= ~I40E_FLAG_FCOE_ENABLED; - pf->num_fcoe_qps = 0; - pf->fcoe_hmc_cntx_num = 0; - pf->fcoe_hmc_filt_num = 0; - - if (!pf->hw.func_caps.fcoe) { - dev_dbg(&pf->pdev->dev, "FCoE capability is disabled\n"); - return; - } - - if (!pf->hw.func_caps.dcb) { - dev_warn(&pf->pdev->dev, - "Hardware is not DCB capable not enabling FCoE.\n"); - return; - } - - /* enable FCoE hash filter */ - val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)); - val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32); - val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32); - val &= I40E_PFQF_HENA_PTYPE_ENA_MASK; - i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val); - - /* enable flag */ - pf->flags |= I40E_FLAG_FCOE_ENABLED; - pf->num_fcoe_qps = I40E_DEFAULT_FCOE; - - /* Reserve 4K DDP contexts and 20K filter size for FCoE */ - pf->fcoe_hmc_cntx_num = BIT(I40E_DMA_CNTX_SIZE_4K) * - I40E_DMA_CNTX_BASE_SIZE; - pf->fcoe_hmc_filt_num = pf->fcoe_hmc_cntx_num + - BIT(I40E_HASH_FILTER_SIZE_16K) * - I40E_HASH_FILTER_BASE_SIZE; - - /* FCoE object: max 16K filter buckets and 4K DMA contexts */ - pf->filter_settings.fcoe_filt_num = I40E_HASH_FILTER_SIZE_16K; - pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K; - - /* Setup max frame with FCoE_MTU plus L2 overheads */ - val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL); - val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK; - val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) - << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT); - i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val); - - dev_info(&pf->pdev->dev, "FCoE is supported.\n"); -} - -/** - * i40e_get_fcoe_tc_map - Return TC map for FCoE APP - * @pf: pointer to PF - * - **/ -u8 i40e_get_fcoe_tc_map(struct i40e_pf *pf) -{ - struct i40e_dcb_app_priority_table app; - struct i40e_hw *hw = &pf->hw; - u8 enabled_tc = 0; - u8 tc, i; - /* Get the FCoE APP TLV */ - struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; - - for (i = 0; i < dcbcfg->numapps; i++) { - app = dcbcfg->app[i]; - if (app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE && - app.protocolid == ETH_P_FCOE) { - tc = dcbcfg->etscfg.prioritytable[app.priority]; - enabled_tc |= BIT(tc); - break; - } - } - - /* TC0 if there is no TC defined for FCoE APP TLV */ - enabled_tc = enabled_tc ? enabled_tc : 0x1; - - return enabled_tc; -} - -/** - * i40e_fcoe_vsi_init - prepares the VSI context for creating a FCoE VSI - * @vsi: pointer to the associated VSI struct - * @ctxt: pointer to the associated VSI context to be passed to HW - * - * Returns 0 on success or < 0 on error - **/ -int i40e_fcoe_vsi_init(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt) -{ - struct i40e_aqc_vsi_properties_data *info = &ctxt->info; - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u8 enabled_tc = 0; - - if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) { - dev_err(&pf->pdev->dev, - "FCoE is not enabled for this device\n"); - return -EPERM; - } - - /* initialize the hardware for FCoE */ - ctxt->pf_num = hw->pf_id; - ctxt->vf_num = 0; - ctxt->uplink_seid = vsi->uplink_seid; - ctxt->connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; - ctxt->flags = I40E_AQ_VSI_TYPE_PF; - - /* FCoE VSI would need the following sections */ - info->valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); - - /* FCoE VSI does not need these sections */ - info->valid_sections &= cpu_to_le16(~(I40E_AQ_VSI_PROP_SECURITY_VALID | - I40E_AQ_VSI_PROP_VLAN_VALID | - I40E_AQ_VSI_PROP_CAS_PV_VALID | - I40E_AQ_VSI_PROP_INGRESS_UP_VALID | - I40E_AQ_VSI_PROP_EGRESS_UP_VALID)); - - if (i40e_is_vsi_uplink_mode_veb(vsi)) { - info->valid_sections |= - cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); - info->switch_id = - cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); - } - enabled_tc = i40e_get_fcoe_tc_map(pf); - i40e_vsi_setup_queue_map(vsi, ctxt, enabled_tc, true); - - /* set up queue option section: only enable FCoE */ - info->queueing_opt_flags = I40E_AQ_VSI_QUE_OPT_FCOE_ENA; - - return 0; -} - -/** - * i40e_fcoe_enable - this is the implementation of ndo_fcoe_enable, - * indicating the upper FCoE protocol stack is ready to use FCoE - * offload features. - * - * @netdev: pointer to the netdev that FCoE is created on - * - * Returns 0 on success - * - * in RTNL - * - **/ -int i40e_fcoe_enable(struct net_device *netdev) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - - if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) { - netdev_err(netdev, "HW does not support FCoE.\n"); - return -ENODEV; - } - - if (vsi->type != I40E_VSI_FCOE) { - netdev_err(netdev, "interface does not support FCoE.\n"); - return -EBUSY; - } - - atomic_inc(&fcoe->refcnt); - - return 0; -} - -/** - * i40e_fcoe_disable- disables FCoE for upper FCoE protocol stack. - * @dev: pointer to the netdev that FCoE is created on - * - * Returns 0 on success - * - **/ -int i40e_fcoe_disable(struct net_device *netdev) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - - if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) { - netdev_err(netdev, "device does not support FCoE\n"); - return -ENODEV; - } - if (vsi->type != I40E_VSI_FCOE) - return -EBUSY; - - if (!atomic_dec_and_test(&fcoe->refcnt)) - return -EINVAL; - - netdev_info(netdev, "FCoE disabled\n"); - - return 0; -} - -/** - * i40e_fcoe_dma_pool_free - free the per cpu pool for FCoE DDP - * @fcoe: the FCoE sw object - * @dev: the device that the pool is associated with - * @cpu: the cpu for this pool - * - **/ -static void i40e_fcoe_dma_pool_free(struct i40e_fcoe *fcoe, - struct device *dev, - unsigned int cpu) -{ - struct i40e_fcoe_ddp_pool *ddp_pool; - - ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu); - if (!ddp_pool->pool) { - dev_warn(dev, "DDP pool already freed for cpu %d\n", cpu); - return; - } - dma_pool_destroy(ddp_pool->pool); - ddp_pool->pool = NULL; -} - -/** - * i40e_fcoe_dma_pool_create - per cpu pool for FCoE DDP - * @fcoe: the FCoE sw object - * @dev: the device that the pool is associated with - * @cpu: the cpu for this pool - * - * Returns 0 on successful or non zero on failure - * - **/ -static int i40e_fcoe_dma_pool_create(struct i40e_fcoe *fcoe, - struct device *dev, - unsigned int cpu) -{ - struct i40e_fcoe_ddp_pool *ddp_pool; - struct dma_pool *pool; - char pool_name[32]; - - ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu); - if (ddp_pool && ddp_pool->pool) { - dev_warn(dev, "DDP pool already allocated for cpu %d\n", cpu); - return 0; - } - snprintf(pool_name, sizeof(pool_name), "i40e_fcoe_ddp_%d", cpu); - pool = dma_pool_create(pool_name, dev, I40E_FCOE_DDP_PTR_MAX, - I40E_FCOE_DDP_PTR_ALIGN, PAGE_SIZE); - if (!pool) { - dev_err(dev, "dma_pool_create %s failed\n", pool_name); - return -ENOMEM; - } - ddp_pool->pool = pool; - return 0; -} - -/** - * i40e_fcoe_free_ddp_resources - release FCoE DDP resources - * @vsi: the vsi FCoE is associated with - * - **/ -void i40e_fcoe_free_ddp_resources(struct i40e_vsi *vsi) -{ - struct i40e_pf *pf = vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - int cpu, i; - - /* do nothing if not FCoE VSI */ - if (vsi->type != I40E_VSI_FCOE) - return; - - /* do nothing if no DDP pools were allocated */ - if (!fcoe->ddp_pool) - return; - - for (i = 0; i < I40E_FCOE_DDP_MAX; i++) - i40e_fcoe_ddp_put(vsi->netdev, i); - - for_each_possible_cpu(cpu) - i40e_fcoe_dma_pool_free(fcoe, &pf->pdev->dev, cpu); - - free_percpu(fcoe->ddp_pool); - fcoe->ddp_pool = NULL; - - netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources released\n", - vsi->id, vsi->seid); -} - -/** - * i40e_fcoe_setup_ddp_resources - allocate per cpu DDP resources - * @vsi: the VSI FCoE is associated with - * - * Returns 0 on successful or non zero on failure - * - **/ -int i40e_fcoe_setup_ddp_resources(struct i40e_vsi *vsi) -{ - struct i40e_pf *pf = vsi->back; - struct device *dev = &pf->pdev->dev; - struct i40e_fcoe *fcoe = &pf->fcoe; - unsigned int cpu; - int i; - - if (vsi->type != I40E_VSI_FCOE) - return -ENODEV; - - /* do nothing if no DDP pools were allocated */ - if (fcoe->ddp_pool) - return -EEXIST; - - /* allocate per CPU memory to track DDP pools */ - fcoe->ddp_pool = alloc_percpu(struct i40e_fcoe_ddp_pool); - if (!fcoe->ddp_pool) { - dev_err(&pf->pdev->dev, "failed to allocate percpu DDP\n"); - return -ENOMEM; - } - - /* allocate pci pool for each cpu */ - for_each_possible_cpu(cpu) { - if (!i40e_fcoe_dma_pool_create(fcoe, dev, cpu)) - continue; - - dev_err(dev, "failed to alloc DDP pool on cpu:%d\n", cpu); - i40e_fcoe_free_ddp_resources(vsi); - return -ENOMEM; - } - - /* initialize the sw context */ - for (i = 0; i < I40E_FCOE_DDP_MAX; i++) - i40e_fcoe_ddp_clear(&fcoe->ddp[i]); - - netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources allocated\n", - vsi->id, vsi->seid); - - return 0; -} - -/** - * i40e_fcoe_handle_status - check the Programming Status for FCoE - * @rx_ring: the Rx ring for this descriptor - * @rx_desc: the Rx descriptor for Programming Status, not a packet descriptor. - * - * Check if this is the Rx Programming Status descriptor write-back for FCoE. - * This is used to verify if the context/filter programming or invalidation - * requested by SW to the HW is successful or not and take actions accordingly. - **/ -void i40e_fcoe_handle_status(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, u8 prog_id) -{ - struct i40e_pf *pf = rx_ring->vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - struct i40e_fcoe_ddp *ddp; - u32 error; - u16 xid; - u64 qw; - - /* we only care for FCoE here */ - if (!i40e_fcoe_progid_is_fcoe(prog_id)) - return; - - xid = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param) & - (I40E_FCOE_DDP_MAX - 1); - - if (!i40e_fcoe_xid_is_valid(xid)) - return; - - ddp = &fcoe->ddp[xid]; - WARN_ON(xid != ddp->xid); - - qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> - I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; - - /* DDP context programming status: failure or success */ - if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) { - if (I40E_RX_PROG_FCOE_ERROR_TBL_FULL(error)) { - dev_err(&pf->pdev->dev, "xid %x ddp->xid %x TABLE FULL\n", - xid, ddp->xid); - ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT; - } - if (I40E_RX_PROG_FCOE_ERROR_CONFLICT(error)) { - dev_err(&pf->pdev->dev, "xid %x ddp->xid %x CONFLICT\n", - xid, ddp->xid); - ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT; - } - } - - /* DDP context invalidation status: failure or success */ - if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS) { - if (I40E_RX_PROG_FCOE_ERROR_INVLFAIL(error)) { - dev_err(&pf->pdev->dev, "xid %x ddp->xid %x INVALIDATION FAILURE\n", - xid, ddp->xid); - ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT; - } - /* clear the flag so we can retry invalidation */ - clear_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags); - } - - /* unmap DMA */ - i40e_fcoe_ddp_unmap(pf, ddp); - i40e_fcoe_ddp_clear(ddp); -} - -/** - * i40e_fcoe_handle_offload - check ddp status and mark it done - * @adapter: i40e adapter - * @rx_desc: advanced rx descriptor - * @skb: the skb holding the received data - * - * This checks ddp status. - * - * Returns : < 0 indicates an error or not a FCOE ddp, 0 indicates - * not passing the skb to ULD, > 0 indicates is the length of data - * being ddped. - * - **/ -int i40e_fcoe_handle_offload(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct i40e_pf *pf = rx_ring->vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - struct fc_frame_header *fh = NULL; - struct i40e_fcoe_ddp *ddp = NULL; - u32 status, fltstat; - u32 error, fcerr; - int rc = -EINVAL; - u16 ptype; - u16 xid; - u64 qw; - - /* check this rxd is for programming status */ - qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - /* packet descriptor, check packet type */ - ptype = (qw & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - if (!i40e_rx_is_fcoe(ptype)) - goto out_no_ddp; - - error = (qw & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT; - fcerr = (error >> I40E_RX_DESC_ERROR_L3L4E_SHIFT) & - I40E_RX_DESC_FCOE_ERROR_MASK; - - /* check stateless offload error */ - if (unlikely(fcerr == I40E_RX_DESC_ERROR_L3L4E_PROT)) { - dev_err(&pf->pdev->dev, "Protocol Error\n"); - skb->ip_summed = CHECKSUM_NONE; - } else { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - - /* check hw status on ddp */ - status = (qw & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; - fltstat = (status >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) & - I40E_RX_DESC_FLTSTAT_FCMASK; - - /* now we are ready to check DDP */ - fh = i40e_fcoe_fc_frame_header(skb); - xid = i40e_fcoe_fc_get_xid(fh); - if (!i40e_fcoe_xid_is_valid(xid)) - goto out_no_ddp; - - /* non DDP normal receive, return to the protocol stack */ - if (fltstat == I40E_RX_DESC_FLTSTAT_NOMTCH) - goto out_no_ddp; - - /* do we have a sw ddp context setup ? */ - ddp = &fcoe->ddp[xid]; - if (!ddp->sgl) - goto out_no_ddp; - - /* fetch xid from hw rxd wb, which should match up the sw ctxt */ - xid = le16_to_cpu(rx_desc->wb.qword0.lo_dword.mirr_fcoe.fcoe_ctx_id); - if (ddp->xid != xid) { - dev_err(&pf->pdev->dev, "xid 0x%x does not match ctx_xid 0x%x\n", - ddp->xid, xid); - goto out_put_ddp; - } - - /* the same exchange has already errored out */ - if (ddp->fcerr) { - dev_err(&pf->pdev->dev, "xid 0x%x fcerr 0x%x reported fcer 0x%x\n", - xid, ddp->fcerr, fcerr); - goto out_put_ddp; - } - - /* fcoe param is valid by now with correct DDPed length */ - ddp->len = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param); - ddp->fcerr = fcerr; - /* header posting only, useful only for target mode and debugging */ - if (fltstat == I40E_RX_DESC_FLTSTAT_DDP) { - /* For target mode, we get header of the last packet but it - * does not have the FCoE trailer field, i.e., CRC and EOF - * Ordered Set since they are offloaded by the HW, so fill - * it up correspondingly to allow the packet to pass through - * to the upper protocol stack. - */ - u32 f_ctl = ntoh24(fh->fh_f_ctl); - - if ((f_ctl & FC_FC_END_SEQ) && - (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA)) { - struct fcoe_crc_eof *crc = NULL; - - crc = skb_put(skb, sizeof(*crc)); - crc->fcoe_eof = FC_EOF_T; - } else { - /* otherwise, drop the header only frame */ - rc = 0; - goto out_no_ddp; - } - } - -out_put_ddp: - /* either we got RSP or we have an error, unmap DMA in both cases */ - i40e_fcoe_ddp_unmap(pf, ddp); - if (ddp->len && !ddp->fcerr) { - int pkts; - - rc = ddp->len; - i40e_fcoe_ddp_clear(ddp); - ddp->len = rc; - pkts = DIV_ROUND_UP(rc, 2048); - rx_ring->stats.bytes += rc; - rx_ring->stats.packets += pkts; - rx_ring->q_vector->rx.total_bytes += rc; - rx_ring->q_vector->rx.total_packets += pkts; - set_bit(__I40E_FCOE_DDP_DONE, &ddp->flags); - } - -out_no_ddp: - return rc; -} - -/** - * i40e_fcoe_ddp_setup - called to set up ddp context - * @netdev: the corresponding net_device - * @xid: the exchange id requesting ddp - * @sgl: the scatter-gather list for this request - * @sgc: the number of scatter-gather items - * @target_mode: indicates this is a DDP request for target - * - * Returns : 1 for success and 0 for no DDP on this I/O - **/ -static int i40e_fcoe_ddp_setup(struct net_device *netdev, u16 xid, - struct scatterlist *sgl, unsigned int sgc, - int target_mode) -{ - static const unsigned int bufflen = I40E_FCOE_DDP_BUF_MIN; - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_fcoe_ddp_pool *ddp_pool; - struct i40e_pf *pf = np->vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - unsigned int i, j, dmacount; - struct i40e_fcoe_ddp *ddp; - unsigned int firstoff = 0; - unsigned int thisoff = 0; - unsigned int thislen = 0; - struct scatterlist *sg; - dma_addr_t addr = 0; - unsigned int len; - - if (xid >= I40E_FCOE_DDP_MAX) { - dev_warn(&pf->pdev->dev, "xid=0x%x out-of-range\n", xid); - return 0; - } - - /* no DDP if we are already down or resetting */ - if (test_bit(__I40E_DOWN, &pf->state) || - test_bit(__I40E_NEEDS_RESTART, &pf->state)) { - dev_info(&pf->pdev->dev, "xid=0x%x device in reset/down\n", - xid); - return 0; - } - - ddp = &fcoe->ddp[xid]; - if (ddp->sgl) { - dev_info(&pf->pdev->dev, "xid 0x%x w/ non-null sgl=%p nents=%d\n", - xid, ddp->sgl, ddp->sgc); - return 0; - } - i40e_fcoe_ddp_clear(ddp); - - if (!fcoe->ddp_pool) { - dev_info(&pf->pdev->dev, "No DDP pool, xid 0x%x\n", xid); - return 0; - } - - ddp_pool = per_cpu_ptr(fcoe->ddp_pool, get_cpu()); - if (!ddp_pool->pool) { - dev_info(&pf->pdev->dev, "No percpu ddp pool, xid 0x%x\n", xid); - goto out_noddp; - } - - /* setup dma from scsi command sgl */ - dmacount = dma_map_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE); - if (dmacount == 0) { - dev_info(&pf->pdev->dev, "dma_map_sg for sgl %p, sgc %d failed\n", - sgl, sgc); - goto out_noddp_unmap; - } - - /* alloc the udl from our ddp pool */ - ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp); - if (!ddp->udl) { - dev_info(&pf->pdev->dev, - "Failed allocated ddp context, xid 0x%x\n", xid); - goto out_noddp_unmap; - } - - j = 0; - ddp->len = 0; - for_each_sg(sgl, sg, dmacount, i) { - addr = sg_dma_address(sg); - len = sg_dma_len(sg); - ddp->len += len; - while (len) { - /* max number of buffers allowed in one DDP context */ - if (j >= I40E_FCOE_DDP_BUFFCNT_MAX) { - dev_info(&pf->pdev->dev, - "xid=%x:%d,%d,%d:addr=%llx not enough descriptors\n", - xid, i, j, dmacount, (u64)addr); - goto out_noddp_free; - } - - /* get the offset of length of current buffer */ - thisoff = addr & ((dma_addr_t)bufflen - 1); - thislen = min_t(unsigned int, (bufflen - thisoff), len); - /* all but the 1st buffer (j == 0) - * must be aligned on bufflen - */ - if ((j != 0) && (thisoff)) - goto out_noddp_free; - - /* all but the last buffer - * ((i == (dmacount - 1)) && (thislen == len)) - * must end at bufflen - */ - if (((i != (dmacount - 1)) || (thislen != len)) && - ((thislen + thisoff) != bufflen)) - goto out_noddp_free; - - ddp->udl[j] = (u64)(addr - thisoff); - /* only the first buffer may have none-zero offset */ - if (j == 0) - firstoff = thisoff; - len -= thislen; - addr += thislen; - j++; - } - } - /* only the last buffer may have non-full bufflen */ - ddp->lastsize = thisoff + thislen; - ddp->firstoff = firstoff; - ddp->list_len = j; - ddp->pool = ddp_pool->pool; - ddp->sgl = sgl; - ddp->sgc = sgc; - ddp->xid = xid; - if (target_mode) - set_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags); - set_bit(__I40E_FCOE_DDP_INITALIZED, &ddp->flags); - - put_cpu(); - return 1; /* Success */ - -out_noddp_free: - dma_pool_free(ddp->pool, ddp->udl, ddp->udp); - i40e_fcoe_ddp_clear(ddp); - -out_noddp_unmap: - dma_unmap_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE); -out_noddp: - put_cpu(); - return 0; -} - -/** - * i40e_fcoe_ddp_get - called to set up ddp context in initiator mode - * @netdev: the corresponding net_device - * @xid: the exchange id requesting ddp - * @sgl: the scatter-gather list for this request - * @sgc: the number of scatter-gather items - * - * This is the implementation of net_device_ops.ndo_fcoe_ddp_setup - * and is expected to be called from ULD, e.g., FCP layer of libfc - * to set up ddp for the corresponding xid of the given sglist for - * the corresponding I/O. - * - * Returns : 1 for success and 0 for no ddp - **/ -static int i40e_fcoe_ddp_get(struct net_device *netdev, u16 xid, - struct scatterlist *sgl, unsigned int sgc) -{ - return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 0); -} - -/** - * i40e_fcoe_ddp_target - called to set up ddp context in target mode - * @netdev: the corresponding net_device - * @xid: the exchange id requesting ddp - * @sgl: the scatter-gather list for this request - * @sgc: the number of scatter-gather items - * - * This is the implementation of net_device_ops.ndo_fcoe_ddp_target - * and is expected to be called from ULD, e.g., FCP layer of libfc - * to set up ddp for the corresponding xid of the given sglist for - * the corresponding I/O. The DDP in target mode is a write I/O request - * from the initiator. - * - * Returns : 1 for success and 0 for no ddp - **/ -static int i40e_fcoe_ddp_target(struct net_device *netdev, u16 xid, - struct scatterlist *sgl, unsigned int sgc) -{ - return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 1); -} - -/** - * i40e_fcoe_program_ddp - programs the HW DDP related descriptors - * @tx_ring: transmit ring for this packet - * @skb: the packet to be sent out - * @sof: the SOF to indicate class of service - * - * Determine if it is READ/WRITE command, and finds out if there is - * a matching SW DDP context for this command. DDP is applicable - * only in case of READ if initiator or WRITE in case of - * responder (via checking XFER_RDY). - * - * Note: caller checks sof and ddp sw context - * - * Returns : none - * - **/ -static void i40e_fcoe_program_ddp(struct i40e_ring *tx_ring, - struct sk_buff *skb, - struct i40e_fcoe_ddp *ddp, u8 sof) -{ - struct i40e_fcoe_filter_context_desc *filter_desc = NULL; - struct i40e_fcoe_queue_context_desc *queue_desc = NULL; - struct i40e_fcoe_ddp_context_desc *ddp_desc = NULL; - struct i40e_pf *pf = tx_ring->vsi->back; - u16 i = tx_ring->next_to_use; - struct fc_frame_header *fh; - u64 flags_rsvd_lanq = 0; - bool target_mode; - - /* check if abort is still pending */ - if (test_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags)) { - dev_warn(&pf->pdev->dev, - "DDP abort is still pending xid:%hx and ddp->flags:%lx:\n", - ddp->xid, ddp->flags); - return; - } - - /* set the flag to indicate this is programmed */ - if (test_and_set_bit(__I40E_FCOE_DDP_PROGRAMMED, &ddp->flags)) { - dev_warn(&pf->pdev->dev, - "DDP is already programmed for xid:%hx and ddp->flags:%lx:\n", - ddp->xid, ddp->flags); - return; - } - - /* Prepare the DDP context descriptor */ - ddp_desc = I40E_DDP_CONTEXT_DESC(tx_ring, i); - i++; - if (i == tx_ring->count) - i = 0; - - ddp_desc->type_cmd_foff_lsize = - cpu_to_le64(I40E_TX_DESC_DTYPE_DDP_CTX | - ((u64)I40E_FCOE_DDP_CTX_DESC_BSIZE_4K << - I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT) | - ((u64)ddp->firstoff << - I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT) | - ((u64)ddp->lastsize << - I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT)); - ddp_desc->rsvd = cpu_to_le64(0); - - /* target mode needs last packet in the sequence */ - target_mode = test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags); - if (target_mode) - ddp_desc->type_cmd_foff_lsize |= - cpu_to_le64(I40E_FCOE_DDP_CTX_DESC_LASTSEQH); - - /* Prepare queue_context descriptor */ - queue_desc = I40E_QUEUE_CONTEXT_DESC(tx_ring, i++); - if (i == tx_ring->count) - i = 0; - queue_desc->dmaindx_fbase = cpu_to_le64(ddp->xid | ((u64)ddp->udp)); - queue_desc->flen_tph = cpu_to_le64(ddp->list_len | - ((u64)(I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC | - I40E_FCOE_QUEUE_CTX_DESC_TPHDATA) << - I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT)); - - /* Prepare filter_context_desc */ - filter_desc = I40E_FILTER_CONTEXT_DESC(tx_ring, i); - i++; - if (i == tx_ring->count) - i = 0; - - fh = (struct fc_frame_header *)skb_transport_header(skb); - filter_desc->param = cpu_to_le32(ntohl(fh->fh_parm_offset)); - filter_desc->seqn = cpu_to_le16(ntohs(fh->fh_seq_cnt)); - filter_desc->rsvd_dmaindx = cpu_to_le16(ddp->xid << - I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT); - - flags_rsvd_lanq = I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP; - flags_rsvd_lanq |= (u64)(target_mode ? - I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP : - I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT); - - flags_rsvd_lanq |= (u64)((sof == FC_SOF_I2 || sof == FC_SOF_N2) ? - I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2 : - I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3); - - flags_rsvd_lanq |= ((u64)skb->queue_mapping << - I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT); - filter_desc->flags_rsvd_lanq = cpu_to_le64(flags_rsvd_lanq); - - /* By this time, all offload related descriptors has been programmed */ - tx_ring->next_to_use = i; -} - -/** - * i40e_fcoe_invalidate_ddp - invalidates DDP in case of abort - * @tx_ring: transmit ring for this packet - * @skb: the packet associated w/ this DDP invalidation, i.e., ABTS - * @ddp: the SW DDP context for this DDP - * - * Programs the Tx context descriptor to do DDP invalidation. - **/ -static void i40e_fcoe_invalidate_ddp(struct i40e_ring *tx_ring, - struct sk_buff *skb, - struct i40e_fcoe_ddp *ddp) -{ - struct i40e_tx_context_desc *context_desc; - int i; - - if (test_and_set_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags)) - return; - - i = tx_ring->next_to_use; - context_desc = I40E_TX_CTXTDESC(tx_ring, i); - i++; - if (i == tx_ring->count) - i = 0; - - context_desc->tunneling_params = cpu_to_le32(0); - context_desc->l2tag2 = cpu_to_le16(0); - context_desc->rsvd = cpu_to_le16(0); - context_desc->type_cmd_tso_mss = cpu_to_le64( - I40E_TX_DESC_DTYPE_FCOE_CTX | - (I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL << - I40E_TXD_CTX_QW1_CMD_SHIFT) | - (I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND << - I40E_TXD_CTX_QW1_CMD_SHIFT)); - tx_ring->next_to_use = i; -} - -/** - * i40e_fcoe_handle_ddp - check we should setup or invalidate DDP - * @tx_ring: transmit ring for this packet - * @skb: the packet to be sent out - * @sof: the SOF to indicate class of service - * - * Determine if it is ABTS/READ/XFER_RDY, and finds out if there is - * a matching SW DDP context for this command. DDP is applicable - * only in case of READ if initiator or WRITE in case of - * responder (via checking XFER_RDY). In case this is an ABTS, send - * just invalidate the context. - **/ -static void i40e_fcoe_handle_ddp(struct i40e_ring *tx_ring, - struct sk_buff *skb, u8 sof) -{ - struct i40e_pf *pf = tx_ring->vsi->back; - struct i40e_fcoe *fcoe = &pf->fcoe; - struct fc_frame_header *fh; - struct i40e_fcoe_ddp *ddp; - u32 f_ctl; - u8 r_ctl; - u16 xid; - - fh = (struct fc_frame_header *)skb_transport_header(skb); - f_ctl = ntoh24(fh->fh_f_ctl); - r_ctl = fh->fh_r_ctl; - ddp = NULL; - - if ((r_ctl == FC_RCTL_DD_DATA_DESC) && (f_ctl & FC_FC_EX_CTX)) { - /* exchange responder? if so, XFER_RDY for write */ - xid = ntohs(fh->fh_rx_id); - if (i40e_fcoe_xid_is_valid(xid)) { - ddp = &fcoe->ddp[xid]; - if ((ddp->xid == xid) && - (test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags))) - i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof); - } - } else if (r_ctl == FC_RCTL_DD_UNSOL_CMD) { - /* exchange originator, check READ cmd */ - xid = ntohs(fh->fh_ox_id); - if (i40e_fcoe_xid_is_valid(xid)) { - ddp = &fcoe->ddp[xid]; - if ((ddp->xid == xid) && - (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags))) - i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof); - } - } else if (r_ctl == FC_RCTL_BA_ABTS) { - /* exchange originator, check ABTS */ - xid = ntohs(fh->fh_ox_id); - if (i40e_fcoe_xid_is_valid(xid)) { - ddp = &fcoe->ddp[xid]; - if ((ddp->xid == xid) && - (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags))) - i40e_fcoe_invalidate_ddp(tx_ring, skb, ddp); - } - } -} - -/** - * i40e_fcoe_tso - set up FCoE TSO - * @tx_ring: ring to send buffer on - * @skb: send buffer - * @tx_flags: collected send information - * @hdr_len: the tso header length - * @sof: the SOF to indicate class of service - * - * Note must already have sof checked to be either class 2 or class 3 before - * calling this function. - * - * Returns 1 to indicate sequence segmentation offload is properly setup - * or returns 0 to indicate no tso is needed, otherwise returns error - * code to drop the frame. - **/ -static int i40e_fcoe_tso(struct i40e_ring *tx_ring, - struct sk_buff *skb, - u32 tx_flags, u8 *hdr_len, u8 sof) -{ - struct i40e_tx_context_desc *context_desc; - u32 cd_type, cd_cmd, cd_tso_len, cd_mss; - struct fc_frame_header *fh; - u64 cd_type_cmd_tso_mss; - - /* must match gso type as FCoE */ - if (!skb_is_gso(skb)) - return 0; - - /* is it the expected gso type for FCoE ?*/ - if (skb_shinfo(skb)->gso_type != SKB_GSO_FCOE) { - netdev_err(skb->dev, - "wrong gso type %d:expecting SKB_GSO_FCOE\n", - skb_shinfo(skb)->gso_type); - return -EINVAL; - } - - /* header and trailer are inserted by hw */ - *hdr_len = skb_transport_offset(skb) + sizeof(struct fc_frame_header) + - sizeof(struct fcoe_crc_eof); - - /* check sof to decide a class 2 or 3 TSO */ - if (likely(i40e_fcoe_sof_is_class3(sof))) - cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3; - else - cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2; - - /* param field valid? */ - fh = (struct fc_frame_header *)skb_transport_header(skb); - if (fh->fh_f_ctl[2] & FC_FC_REL_OFF) - cd_cmd |= I40E_FCOE_TX_CTX_DESC_RELOFF; - - /* fill the field values */ - cd_type = I40E_TX_DESC_DTYPE_FCOE_CTX; - cd_tso_len = skb->len - *hdr_len; - cd_mss = skb_shinfo(skb)->gso_size; - cd_type_cmd_tso_mss = - ((u64)cd_type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | - ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); - - /* grab the next descriptor */ - context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use); - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; - - context_desc->tunneling_params = 0; - context_desc->l2tag2 = cpu_to_le16((tx_flags & I40E_TX_FLAGS_VLAN_MASK) - >> I40E_TX_FLAGS_VLAN_SHIFT); - context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); - - return 1; -} - -/** - * i40e_fcoe_tx_map - build the tx descriptor - * @tx_ring: ring to send buffer on - * @skb: send buffer - * @first: first buffer info buffer to use - * @tx_flags: collected send information - * @hdr_len: ptr to the size of the packet header - * @eof: the frame eof value - * - * Note, for FCoE, sof and eof are already checked - **/ -static void i40e_fcoe_tx_map(struct i40e_ring *tx_ring, - struct sk_buff *skb, - struct i40e_tx_buffer *first, - u32 tx_flags, u8 hdr_len, u8 eof) -{ - u32 td_offset = 0; - u32 td_cmd = 0; - u32 maclen; - - /* insert CRC */ - td_cmd = I40E_TX_DESC_CMD_ICRC; - - /* setup MACLEN */ - maclen = skb_network_offset(skb); - if (tx_flags & I40E_TX_FLAGS_SW_VLAN) - maclen += sizeof(struct vlan_hdr); - - if (skb->protocol == htons(ETH_P_FCOE)) { - /* for FCoE, maclen should exclude ether type */ - maclen -= 2; - /* setup type as FCoE and EOF insertion */ - td_cmd |= (I40E_TX_DESC_CMD_FCOET | i40e_fcoe_ctxt_eof(eof)); - /* setup FCoELEN and FCLEN */ - td_offset |= ((((sizeof(struct fcoe_hdr) + 2) >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT) | - ((sizeof(struct fc_frame_header) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT)); - /* trim to exclude trailer */ - pskb_trim(skb, skb->len - sizeof(struct fcoe_crc_eof)); - } - - /* MACLEN is ether header length in words not bytes */ - td_offset |= (maclen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; - - i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset); -} - -/** - * i40e_fcoe_set_skb_header - adjust skb header point for FIP/FCoE/FC - * @skb: the skb to be adjusted - * - * Returns true if this skb is a FCoE/FIP or VLAN carried FCoE/FIP and then - * adjusts the skb header pointers correspondingly. Otherwise, returns false. - **/ -static inline int i40e_fcoe_set_skb_header(struct sk_buff *skb) -{ - __be16 protocol = skb->protocol; - - skb_reset_mac_header(skb); - skb->mac_len = sizeof(struct ethhdr); - if (protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veth = (struct vlan_ethhdr *)eth_hdr(skb); - - protocol = veth->h_vlan_encapsulated_proto; - skb->mac_len += sizeof(struct vlan_hdr); - } - - /* FCoE or FIP only */ - if ((protocol != htons(ETH_P_FIP)) && - (protocol != htons(ETH_P_FCOE))) - return -EINVAL; - - /* set header to L2 of FCoE/FIP */ - skb_set_network_header(skb, skb->mac_len); - if (protocol == htons(ETH_P_FIP)) - return 0; - - /* set header to L3 of FC */ - skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr)); - return 0; -} - -/** - * i40e_fcoe_xmit_frame - transmit buffer - * @skb: send buffer - * @netdev: the fcoe netdev - * - * Returns 0 if sent, else an error code - **/ -static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb, - struct net_device *netdev) -{ - struct i40e_netdev_priv *np = netdev_priv(skb->dev); - struct i40e_vsi *vsi = np->vsi; - struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; - struct i40e_tx_buffer *first; - u32 tx_flags = 0; - int fso, count; - u8 hdr_len = 0; - u8 sof = 0; - u8 eof = 0; - - if (i40e_fcoe_set_skb_header(skb)) - goto out_drop; - - count = i40e_xmit_descriptor_count(skb); - if (i40e_chk_linearize(skb, count)) { - if (__skb_linearize(skb)) - goto out_drop; - count = i40e_txd_use_count(skb->len); - tx_ring->tx_stats.tx_linearize++; - } - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return NETDEV_TX_BUSY; - } - - /* prepare the xmit flags */ - if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) - goto out_drop; - - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_bi[tx_ring->next_to_use]; - - /* FIP is a regular L2 traffic w/o offload */ - if (skb->protocol == htons(ETH_P_FIP)) - goto out_send; - - /* check sof and eof, only supports FC Class 2 or 3 */ - if (i40e_fcoe_fc_sof(skb, &sof) || i40e_fcoe_fc_eof(skb, &eof)) { - netdev_err(netdev, "SOF/EOF error:%02x - %02x\n", sof, eof); - goto out_drop; - } - - /* always do FCCRC for FCoE */ - tx_flags |= I40E_TX_FLAGS_FCCRC; - - /* check we should do sequence offload */ - fso = i40e_fcoe_tso(tx_ring, skb, tx_flags, &hdr_len, sof); - if (fso < 0) - goto out_drop; - else if (fso) - tx_flags |= I40E_TX_FLAGS_FSO; - else - i40e_fcoe_handle_ddp(tx_ring, skb, sof); - -out_send: - /* send out the packet */ - i40e_fcoe_tx_map(tx_ring, skb, first, tx_flags, hdr_len, eof); - - i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); - return NETDEV_TX_OK; - -out_drop: - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; -} - -/** - * i40e_fcoe_change_mtu - NDO callback to change the Maximum Transfer Unit - * @netdev: network interface device structure - * @new_mtu: new value for maximum frame size - * - * Returns error as operation not permitted - * - **/ -static int i40e_fcoe_change_mtu(struct net_device *netdev, int new_mtu) -{ - netdev_warn(netdev, "MTU change is not supported on FCoE interfaces\n"); - return -EPERM; -} - -/** - * i40e_fcoe_set_features - set the netdev feature flags - * @netdev: ptr to the netdev being adjusted - * @features: the feature set that the stack is suggesting - * - **/ -static int i40e_fcoe_set_features(struct net_device *netdev, - netdev_features_t features) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - - if (features & NETIF_F_HW_VLAN_CTAG_RX) - i40e_vlan_stripping_enable(vsi); - else - i40e_vlan_stripping_disable(vsi); - - return 0; -} - -static const struct net_device_ops i40e_fcoe_netdev_ops = { - .ndo_open = i40e_open, - .ndo_stop = i40e_close, - .ndo_get_stats64 = i40e_get_netdev_stats_struct, - .ndo_set_rx_mode = i40e_set_rx_mode, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = i40e_set_mac, - .ndo_change_mtu = i40e_fcoe_change_mtu, - .ndo_do_ioctl = i40e_ioctl, - .ndo_tx_timeout = i40e_tx_timeout, - .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, - .ndo_setup_tc = __i40e_setup_tc, - -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = i40e_netpoll, -#endif - .ndo_start_xmit = i40e_fcoe_xmit_frame, - .ndo_fcoe_enable = i40e_fcoe_enable, - .ndo_fcoe_disable = i40e_fcoe_disable, - .ndo_fcoe_ddp_setup = i40e_fcoe_ddp_get, - .ndo_fcoe_ddp_done = i40e_fcoe_ddp_put, - .ndo_fcoe_ddp_target = i40e_fcoe_ddp_target, - .ndo_set_features = i40e_fcoe_set_features, -}; - -/* fcoe network device type */ -static struct device_type fcoe_netdev_type = { - .name = "fcoe", -}; - -/** - * i40e_fcoe_config_netdev - prepares the VSI context for creating a FCoE VSI - * @vsi: pointer to the associated VSI struct - * @ctxt: pointer to the associated VSI context to be passed to HW - * - * Returns 0 on success or < 0 on error - **/ -void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi) -{ - struct i40e_hw *hw = &vsi->back->hw; - struct i40e_pf *pf = vsi->back; - - if (vsi->type != I40E_VSI_FCOE) - return; - - netdev->features = (NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER); - - netdev->vlan_features = netdev->features; - netdev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER); - netdev->fcoe_ddp_xid = I40E_FCOE_DDP_MAX - 1; - netdev->features |= NETIF_F_ALL_FCOE; - netdev->vlan_features |= NETIF_F_ALL_FCOE; - netdev->hw_features |= netdev->features; - netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->priv_flags |= IFF_SUPP_NOFCS; - - strlcpy(netdev->name, "fcoe%d", IFNAMSIZ-1); - netdev->mtu = FCOE_MTU; - SET_NETDEV_DEV(netdev, &pf->pdev->dev); - SET_NETDEV_DEVTYPE(netdev, &fcoe_netdev_type); - /* set different dev_port value 1 for FCoE netdev than the default - * zero dev_port value for PF netdev, this helps biosdevname user - * tool to differentiate them correctly while both attached to the - * same PCI function. - */ - netdev->dev_port = 1; - spin_lock_bh(&vsi->mac_filter_hash_lock); - i40e_add_filter(vsi, hw->mac.san_addr, 0); - i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0); - i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0); - i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0); - spin_unlock_bh(&vsi->mac_filter_hash_lock); - - /* use san mac */ - ether_addr_copy(netdev->dev_addr, hw->mac.san_addr); - ether_addr_copy(netdev->perm_addr, hw->mac.san_addr); - /* fcoe netdev ops */ - netdev->netdev_ops = &i40e_fcoe_netdev_ops; -} - -/** - * i40e_fcoe_vsi_setup - allocate and set up FCoE VSI - * @pf: the PF that VSI is associated with - * - **/ -void i40e_fcoe_vsi_setup(struct i40e_pf *pf) -{ - struct i40e_vsi *vsi; - u16 seid; - int i; - - if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) - return; - - for (i = 0; i < pf->num_alloc_vsi; i++) { - vsi = pf->vsi[i]; - if (vsi && vsi->type == I40E_VSI_FCOE) { - dev_warn(&pf->pdev->dev, - "FCoE VSI already created\n"); - return; - } - } - - seid = pf->vsi[pf->lan_vsi]->seid; - vsi = i40e_vsi_setup(pf, I40E_VSI_FCOE, seid, 0); - if (vsi) { - dev_dbg(&pf->pdev->dev, - "Successfully created FCoE VSI seid %d id %d uplink_seid %d PF seid %d\n", - vsi->seid, vsi->id, vsi->uplink_seid, seid); - } else { - dev_info(&pf->pdev->dev, "Failed to create FCoE VSI\n"); - } -} diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h b/drivers/net/ethernet/intel/i40e/i40e_fcoe.h deleted file mode 100644 index a93174ddeaba..000000000000 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h +++ /dev/null @@ -1,127 +0,0 @@ -/******************************************************************************* - * - * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see <http://www.gnu.org/licenses/>. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Contact Information: - * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - * - ******************************************************************************/ - -#ifndef _I40E_FCOE_H_ -#define _I40E_FCOE_H_ - -/* FCoE HW context helper macros */ -#define I40E_DDP_CONTEXT_DESC(R, i) \ - (&(((struct i40e_fcoe_ddp_context_desc *)((R)->desc))[i])) - -#define I40E_QUEUE_CONTEXT_DESC(R, i) \ - (&(((struct i40e_fcoe_queue_context_desc *)((R)->desc))[i])) - -#define I40E_FILTER_CONTEXT_DESC(R, i) \ - (&(((struct i40e_fcoe_filter_context_desc *)((R)->desc))[i])) - -/* receive queue descriptor filter status for FCoE */ -#define I40E_RX_DESC_FLTSTAT_FCMASK 0x3 -#define I40E_RX_DESC_FLTSTAT_NOMTCH 0x0 /* no ddp context match */ -#define I40E_RX_DESC_FLTSTAT_NODDP 0x1 /* no ddp due to error */ -#define I40E_RX_DESC_FLTSTAT_DDP 0x2 /* DDPed payload, post header */ -#define I40E_RX_DESC_FLTSTAT_FCPRSP 0x3 /* FCP_RSP */ - -/* receive queue descriptor error codes for FCoE */ -#define I40E_RX_DESC_FCOE_ERROR_MASK \ - (I40E_RX_DESC_ERROR_L3L4E_PROT | \ - I40E_RX_DESC_ERROR_L3L4E_FC | \ - I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR | \ - I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN) - -/* receive queue descriptor programming error */ -#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL(e) \ - (((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT) & 0x1) - -#define I40E_RX_PROG_FCOE_ERROR_CONFLICT(e) \ - (((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT) & 0x1) - -#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT \ - BIT(I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT) -#define I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT \ - BIT(I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT) - -#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL(e) \ - I40E_RX_PROG_FCOE_ERROR_CONFLICT(e) -#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT \ - I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT - -/* FCoE DDP related definitions */ -#define I40E_FCOE_MIN_XID 0x0000 /* the min xid supported by fcoe_sw */ -#define I40E_FCOE_MAX_XID 0x0FFF /* the max xid supported by fcoe_sw */ -#define I40E_FCOE_DDP_BUFFCNT_MAX 512 /* 9 bits bufcnt */ -#define I40E_FCOE_DDP_PTR_ALIGN 16 -#define I40E_FCOE_DDP_PTR_MAX (I40E_FCOE_DDP_BUFFCNT_MAX * sizeof(dma_addr_t)) -#define I40E_FCOE_DDP_BUF_MIN 4096 -#define I40E_FCOE_DDP_MAX 2048 -#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT 8 - -/* supported netdev features for FCoE */ -#define I40E_FCOE_NETIF_FEATURES (NETIF_F_ALL_FCOE | \ - NETIF_F_HW_VLAN_CTAG_TX | \ - NETIF_F_HW_VLAN_CTAG_RX | \ - NETIF_F_HW_VLAN_CTAG_FILTER) - -/* DDP context flags */ -enum i40e_fcoe_ddp_flags { - __I40E_FCOE_DDP_NONE = 1, - __I40E_FCOE_DDP_TARGET, - __I40E_FCOE_DDP_INITALIZED, - __I40E_FCOE_DDP_PROGRAMMED, - __I40E_FCOE_DDP_DONE, - __I40E_FCOE_DDP_ABORTED, - __I40E_FCOE_DDP_UNMAPPED, -}; - -/* DDP SW context struct */ -struct i40e_fcoe_ddp { - int len; - u16 xid; - u16 firstoff; - u16 lastsize; - u16 list_len; - u8 fcerr; - u8 prerr; - unsigned long flags; - unsigned int sgc; - struct scatterlist *sgl; - dma_addr_t udp; - u64 *udl; - struct dma_pool *pool; - -}; - -struct i40e_fcoe_ddp_pool { - struct dma_pool *pool; -}; - -struct i40e_fcoe { - unsigned long mode; - atomic_t refcnt; - struct i40e_fcoe_ddp_pool __percpu *ddp_pool; - struct i40e_fcoe_ddp ddp[I40E_FCOE_DDP_MAX]; -}; - -#endif /* _I40E_FCOE_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e31adbc75f9c..4a4401c61089 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -69,12 +69,6 @@ static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); -static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add); -static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add); static int i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type); @@ -215,8 +209,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) { dev_info(&pf->pdev->dev, - "param err: pile=%p needed=%d id=0x%04x\n", - pile, needed, id); + "param err: pile=%s needed=%d id=0x%04x\n", + pile ? "<valid>" : "<null>", needed, id); return -EINVAL; } @@ -1380,14 +1374,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, ether_addr_copy(f->macaddr, macaddr); f->vlan = vlan; - /* If we're in overflow promisc mode, set the state directly - * to failed, so we don't bother to try sending the filter - * to the hardware. - */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state)) - f->state = I40E_FILTER_FAILED; - else - f->state = I40E_FILTER_NEW; + f->state = I40E_FILTER_NEW; INIT_HLIST_NODE(&f->hlist); key = i40e_addr_to_hkey(macaddr); @@ -2116,17 +2103,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, * @list: the list of filters to send to firmware * @add_head: Position in the add hlist * @num_add: the number of filters to add - * @promisc_change: set to true on exit if promiscuous mode was forced on * * Send a request to firmware via AdminQ to add a chunk of filters. Will set - * promisc_changed to true if the firmware has run out of space for more - * filters. + * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of + * space for more filters. */ static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, struct i40e_new_mac_filter *add_head, - int num_add, bool *promisc_changed) + int num_add) { struct i40e_hw *hw = &vsi->back->hw; int aq_err, fcnt; @@ -2136,7 +2122,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { - *promisc_changed = true; set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", @@ -2177,11 +2162,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) + if (aq_ret) { + set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, - "Error %s setting broadcast promiscuous mode on %s\n", + "Error %s, forcing overflow promiscuous on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); + } return aq_ret; } @@ -2267,9 +2254,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) struct i40e_mac_filter *f; struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; + bool old_overflow, new_overflow; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; - bool promisc_changed = false; char vsi_name[16] = "PF"; int filter_list_len = 0; i40e_status aq_ret = 0; @@ -2291,6 +2278,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) usleep_range(1000, 2000); pf = vsi->back; + old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + if (vsi->netdev) { changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; vsi->current_netdev_flags = vsi->netdev->flags; @@ -2423,12 +2412,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) num_add = 0; hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)) { - new->state = I40E_FILTER_FAILED; - continue; - } - /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ @@ -2464,15 +2447,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_add == filter_list_len) { i40e_aqc_add_filters(vsi, vsi_name, add_list, - add_head, num_add, - &promisc_changed); + add_head, num_add); memset(add_list, 0, list_size); num_add = 0; } } if (num_add) { i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, - num_add, &promisc_changed); + num_add); } /* Now move all of the filters from the temp add list back to * the VSI's list. @@ -2501,24 +2483,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } spin_unlock_bh(&vsi->mac_filter_hash_lock); - /* If promiscuous mode has changed, we need to calculate a new - * threshold for when we are safe to exit - */ - if (promisc_changed) - vsi->promisc_threshold = (vsi->active_filters * 3) / 4; - /* Check if we are able to exit overflow promiscuous mode. We can * safely exit if we didn't just enter, we no longer have any failed * filters, and we have reduced filters below the threshold value. */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) && - !promisc_changed && !failed_filters && - (vsi->active_filters < vsi->promisc_threshold)) { + if (old_overflow && !failed_filters && + vsi->active_filters < vsi->promisc_threshold) { dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n", vsi_name); clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); - promisc_changed = true; vsi->promisc_threshold = 0; } @@ -2528,6 +2502,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) goto out; } + new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + + /* If we are entering overflow promiscuous, we need to calculate a new + * threshold for when we are safe to exit + */ + if (!old_overflow && new_overflow) + vsi->promisc_threshold = (vsi->active_filters * 3) / 4; + /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc; @@ -2548,12 +2530,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } } - if ((changed_flags & IFF_PROMISC) || promisc_changed) { + if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || - test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)); + new_overflow); aq_ret = i40e_set_promiscuous(pf, cur_promisc); if (aq_ret) { retval = i40e_aq_rc_to_posix(aq_ret, @@ -2738,22 +2719,6 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi) } /** - * i40e_vlan_rx_register - Setup or shutdown vlan offload - * @netdev: network interface to be adjusted - * @features: netdev features to test if VLAN offload is enabled or not - **/ -static void i40e_vlan_rx_register(struct net_device *netdev, u32 features) -{ - struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_vsi *vsi = np->vsi; - - if (features & NETIF_F_HW_VLAN_CTAG_RX) - i40e_vlan_stripping_enable(vsi); - else - i40e_vlan_stripping_disable(vsi); -} - -/** * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address * @vsi: the vsi being configured * @vid: vlan id to be added (0 = untagged only , -1 = any) @@ -2928,7 +2893,10 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi) if (!vsi->netdev) return; - i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features); + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + i40e_vlan_stripping_enable(vsi); + else + i40e_vlan_stripping_disable(vsi); for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), @@ -3449,15 +3417,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i]; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = + ITR_TO_REG(vsi->rx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), - q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = + ITR_TO_REG(vsi->tx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), - q_vector->tx.itr); + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); @@ -3558,13 +3531,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) u32 val; /* set the ITR configuration */ - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; i40e_enable_misc_int_causes(pf); @@ -5375,7 +5349,7 @@ out: * @vsi: VSI to be configured * **/ -int i40e_get_link_speed(struct i40e_vsi *vsi) +static int i40e_get_link_speed(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; @@ -6560,6 +6534,75 @@ int i40e_up(struct i40e_vsi *vsi) } /** + * i40e_force_link_state - Force the link status + * @pf: board private structure + * @is_up: whether the link state should be forced up or down + **/ +static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up) +{ + struct i40e_aq_get_phy_abilities_resp abilities; + struct i40e_aq_set_phy_config config = {0}; + struct i40e_hw *hw = &pf->hw; + i40e_status err; + u64 mask; + + /* Get the current phy config */ + err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, + NULL); + if (err) { + dev_err(&pf->pdev->dev, + "failed to get phy cap., ret = %s last_status = %s\n", + i40e_stat_str(hw, err), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return err; + } + + /* If link needs to go up, but was not forced to go down, + * no need for a flap + */ + if (is_up && abilities.phy_type != 0) + return I40E_SUCCESS; + + /* To force link we need to set bits for all supported PHY types, + * but there are now more than 32, so we need to split the bitmap + * across two fields. + */ + mask = I40E_PHY_TYPES_BITMASK; + config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0; + config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0; + /* Copy the old settings, except of phy_type */ + config.abilities = abilities.abilities; + config.link_speed = abilities.link_speed; + config.eee_capability = abilities.eee_capability; + config.eeer = abilities.eeer_val; + config.low_power_ctrl = abilities.d3_lpan; + err = i40e_aq_set_phy_config(hw, &config, NULL); + + if (err) { + dev_err(&pf->pdev->dev, + "set phy config ret = %s last_status = %s\n", + i40e_stat_str(&pf->hw, err), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return err; + } + + /* Update the link info */ + err = i40e_update_link_info(hw); + if (err) { + /* Wait a little bit (on 40G cards it sometimes takes a really + * long time for link to come back from the atomic reset) + * and try once more + */ + msleep(1000); + i40e_update_link_info(hw); + } + + i40e_aq_set_link_restart_an(hw, true, NULL); + + return I40E_SUCCESS; +} + +/** * i40e_down - Shutdown the connection processing * @vsi: the VSI being stopped **/ @@ -6576,6 +6619,9 @@ void i40e_down(struct i40e_vsi *vsi) } i40e_vsi_disable_irq(vsi); i40e_vsi_stop_rings(vsi); + if (vsi->type == I40E_VSI_MAIN && + vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) + i40e_force_link_state(vsi->back, false); i40e_napi_disable_all(vsi); for (i = 0; i < vsi->num_queue_pairs; i++) { @@ -6848,8 +6894,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter, * Add or delete a cloud filter for a specific flow spec. * Returns 0 if the filter were successfully added. **/ -static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, bool add) +int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, bool add) { struct i40e_aqc_cloud_filters_element_data cld_filter; struct i40e_pf *pf = vsi->back; @@ -6915,9 +6961,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, * Add or delete a cloud filter for a specific flow spec using big buffer. * Returns 0 if the filter were successfully added. **/ -static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add) +int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add) { struct i40e_aqc_cloud_filters_element_bb cld_filter; struct i40e_pf *pf = vsi->back; @@ -7537,6 +7583,9 @@ int i40e_open(struct net_device *netdev) netif_carrier_off(netdev); + if (i40e_force_link_state(pf, true)) + return -EAGAIN; + err = i40e_vsi_open(vsi); if (err) return err; @@ -8087,6 +8136,88 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf) } /** + * i40e_reenable_fdir_sb - Restore FDir SB capability + * @pf: board private structure + **/ +static void i40e_reenable_fdir_sb(struct i40e_pf *pf) +{ + if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) { + pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED; + if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && + (I40E_DEBUG_FD & pf->hw.debug_mask)) + dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); + } +} + +/** + * i40e_reenable_fdir_atr - Restore FDir ATR capability + * @pf: board private structure + **/ +static void i40e_reenable_fdir_atr(struct i40e_pf *pf) +{ + if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) { + /* ATR uses the same filtering logic as SB rules. It only + * functions properly if the input set mask is at the default + * settings. It is safe to restore the default input set + * because there are no active TCPv4 filter rules. + */ + i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP, + I40E_L3_SRC_MASK | I40E_L3_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + + pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + (I40E_DEBUG_FD & pf->hw.debug_mask)) + dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); + } +} + +/** + * i40e_delete_invalid_filter - Delete an invalid FDIR filter + * @pf: board private structure + * @filter: FDir filter to remove + */ +static void i40e_delete_invalid_filter(struct i40e_pf *pf, + struct i40e_fdir_filter *filter) +{ + /* Update counters */ + pf->fdir_pf_active_filters--; + pf->fd_inv = 0; + + switch (filter->flow_type) { + case TCP_V4_FLOW: + pf->fd_tcp4_filter_cnt--; + break; + case UDP_V4_FLOW: + pf->fd_udp4_filter_cnt--; + break; + case SCTP_V4_FLOW: + pf->fd_sctp4_filter_cnt--; + break; + case IP_USER_FLOW: + switch (filter->ip4_proto) { + case IPPROTO_TCP: + pf->fd_tcp4_filter_cnt--; + break; + case IPPROTO_UDP: + pf->fd_udp4_filter_cnt--; + break; + case IPPROTO_SCTP: + pf->fd_sctp4_filter_cnt--; + break; + case IPPROTO_IP: + pf->fd_ip4_filter_cnt--; + break; + } + break; + } + + /* Remove the filter from the list and free memory */ + hlist_del(&filter->fdir_node); + kfree(filter); +} + +/** * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled * @pf: board private structure **/ @@ -8104,40 +8235,23 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) fcnt_avail = pf->fdir_pf_filter_count; if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) || (pf->fd_add_err == 0) || - (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) { - if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) { - pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED; - if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && - (I40E_DEBUG_FD & pf->hw.debug_mask)) - dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); - } - } + (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) + i40e_reenable_fdir_sb(pf); /* We should wait for even more space before re-enabling ATR. * Additionally, we cannot enable ATR as long as we still have TCP SB * rules active. */ if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) && - (pf->fd_tcp4_filter_cnt == 0)) { - if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) { - pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (I40E_DEBUG_FD & pf->hw.debug_mask)) - dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); - } - } + (pf->fd_tcp4_filter_cnt == 0)) + i40e_reenable_fdir_atr(pf); /* if hw had a problem adding a filter, delete it */ if (pf->fd_inv > 0) { hlist_for_each_entry_safe(filter, node, - &pf->fdir_filter_list, fdir_node) { - if (filter->fd_id == pf->fd_inv) { - hlist_del(&filter->fdir_node); - kfree(filter); - pf->fdir_pf_active_filters--; - pf->fd_inv = 0; - } - } + &pf->fdir_filter_list, fdir_node) + if (filter->fd_id == pf->fd_inv) + i40e_delete_invalid_filter(pf, filter); } } @@ -9215,6 +9329,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); + if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && + ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || + hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { + /* The following delay is necessary for 4.33 firmware and older + * to recover after EMP reset. 200 ms should suffice but we + * put here 300 ms to be sure that FW is ready to operate + * after reset. + */ + mdelay(300); + } + /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); @@ -9937,18 +10062,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) mutex_lock(&pf->switch_mutex); if (!pf->vsi[vsi->idx]) { - dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n", - vsi->idx, vsi->idx, vsi, vsi->type); + dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n", + vsi->idx, vsi->idx, vsi->type); goto unlock_vsi; } if (pf->vsi[vsi->idx] != vsi) { dev_err(&pf->pdev->dev, - "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n", + "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n", pf->vsi[vsi->idx]->idx, - pf->vsi[vsi->idx], pf->vsi[vsi->idx]->type, - vsi->idx, vsi, vsi->type); + vsi->idx, vsi->type); goto unlock_vsi; } @@ -10018,7 +10142,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = ring++; if (!i40e_enabled_xdp_vsi(vsi)) @@ -10036,7 +10160,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->xdp_rings[i] = ring++; setup_rx: @@ -10049,7 +10173,7 @@ setup_rx: ring->count = vsi->num_desc; ring->size = 0; ring->dcb_tc = 0; - ring->rx_itr_setting = pf->rx_itr_default; + ring->itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = ring; } @@ -10328,9 +10452,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll, NAPI_POLL_WEIGHT); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; @@ -10473,6 +10594,9 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) if (err) goto err_unwind; + if (pf->flags & I40E_FLAG_IWARP_ENABLED) + i40e_client_update_msix_info(pf); + return 0; err_unwind: @@ -11089,6 +11213,16 @@ static int i40e_sw_init(struct i40e_pf *pf) /* IWARP needs one extra vector for CQP just like MISC.*/ pf->num_iwarp_msix = (int)num_online_cpus() + 1; } + /* Stopping the FW LLDP engine is only supported on the + * XL710 with a FW ver >= 1.7. Also, stopping FW LLDP + * engine is not supported if NPAR is functioning on this + * part + */ + if (pf->hw.mac.type == I40E_MAC_XL710 && + !pf->hw.func_caps.npar_enable && + (pf->hw.aq.api_maj_ver > 1 || + (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6))) + pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP; #ifdef CONFIG_PCI_IOV if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { @@ -14213,6 +14347,11 @@ static int __maybe_unused i40e_suspend(struct device *dev) del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); + /* Client close must be called explicitly here because the timer + * has been stopped. + */ + i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); + if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 83798b7841b9..eabb636f6a19 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -287,7 +287,7 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); -i40e_status +enum i40e_status_code i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid, struct i40e_aqc_cloud_filters_element_bb *filters, u8 filter_count); @@ -299,7 +299,7 @@ enum i40e_status_code i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi, struct i40e_aqc_cloud_filters_element_data *filters, u8 filter_count); -i40e_status +enum i40e_status_code i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, struct i40e_aqc_cloud_filters_element_bb *filters, u8 filter_count); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e554aa6cf070..97cfe944b568 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: use SW variables * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); - tail = readl(ring->tail); + if (!in_sw) { + head = i40e_get_head(ring); + tail = readl(ring->tail); + } else { + head = ring->next_to_clean; + tail = ring->next_to_use; + } if (head != tail) return (head < tail) ? @@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) */ smp_rmb(); tx_ring->tx_stats.prev_pkt_ctr = - i40e_get_tx_pending(tx_ring) ? packets : -1; + i40e_get_tx_pending(tx_ring, true) ? packets : -1; } } } @@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - unsigned int j = i40e_get_tx_pending(tx_ring); + unsigned int j = i40e_get_tx_pending(tx_ring, false); if (budget && ((j / WB_STRIDE) == 0) && (j > 0) && @@ -995,99 +1001,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) } } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->vsi->back->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; + + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1991,7 +2139,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -2274,29 +2422,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); + (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->rx_rings[idx]->rx_itr_setting; -} -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->tx_rings[idx]->tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -2308,10 +2472,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; + u32 intval; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { @@ -2319,65 +2480,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, return; } - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); - - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } - - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 701b708628b0..3c80ea784389 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -30,32 +30,37 @@ #include <net/xdp.h> /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) + /** * i40e_intrl_usec_to_reg - convert interrupt rate limit to register * @intrl: interrupt rate limit to convert @@ -382,8 +387,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring) ring->flags |= I40E_TXR_FLAGS_XDP; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ @@ -501,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring); void i40e_free_rx_resources(struct i40e_ring *rx_ring); int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); -u32 i40e_get_tx_pending(struct i40e_ring *ring); +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); void i40e_detect_recover_hung(struct i40e_vsi *vsi); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index cd294e6a8587..69ea15892a5b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -39,7 +39,7 @@ #define I40E_MASK(mask, shift) ((u32)(mask) << (shift)) #define I40E_MAX_VSI_QP 16 -#define I40E_MAX_VF_VSI 3 +#define I40E_MAX_VF_VSI 4 #define I40E_MAX_CHAINED_RX_BUFFERS 5 #define I40E_MAX_PF_UDP_OFFLOAD_PORTS 16 @@ -1336,6 +1336,9 @@ struct i40e_hw_port_stats { #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE 1024 #define I40E_SR_CONTROL_WORD_1_SHIFT 0x06 #define I40E_SR_CONTROL_WORD_1_MASK (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT) +#define I40E_PTR_TYPE BIT(15) +#define I40E_SR_OCP_CFG_WORD0 0x2B +#define I40E_SR_OCP_ENABLED BIT(15) /* Shadow RAM related */ #define I40E_SR_SECTOR_SIZE_IN_WORDS 0x800 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index e9309fb9084b..321ab4badb68 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -258,6 +258,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id, } /** + * i40e_get_real_pf_qid + * @vf: pointer to the VF info + * @vsi_id: vsi id + * @queue_id: queue number + * + * wrapper function to get pf_queue_id handling ADq code as well + **/ +static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id) +{ + int i; + + if (vf->adq_enabled) { + /* Although VF considers all the queues(can be 1 to 16) as its + * own but they may actually belong to different VSIs(up to 4). + * We need to find which queues belongs to which VSI. + */ + for (i = 0; i < vf->num_tc; i++) { + if (queue_id < vf->ch[i].num_qps) { + vsi_id = vf->ch[i].vsi_id; + break; + } + /* find right queue id which is relative to a + * given VSI. + */ + queue_id -= vf->ch[i].num_qps; + } + } + + return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id); +} + +/** * i40e_config_irq_link_list * @vf: pointer to the VF info * @vsi_id: id of VSI as given by the FW @@ -310,7 +342,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); + pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id); reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id); wr32(hw, reg_idx, reg); @@ -333,8 +365,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, if (next_q < size) { vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, - vsi_queue_id); + pf_queue_id = i40e_get_real_pf_qid(vf, + vsi_id, + vsi_queue_id); } else { pf_queue_id = I40E_QUEUE_END_OF_LIST; qtype = 0; @@ -669,18 +702,20 @@ error_param: /** * i40e_alloc_vsi_res * @vf: pointer to the VF info - * @type: type of VSI to allocate + * @idx: VSI index, applies only for ADq mode, zero otherwise * * alloc VF vsi context & resources **/ -static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) +static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx) { struct i40e_mac_filter *f = NULL; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi; + u64 max_tx_rate = 0; int ret = 0; - vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id); + vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid, + vf->vf_id); if (!vsi) { dev_err(&pf->pdev->dev, @@ -689,7 +724,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) ret = -ENOENT; goto error_alloc_vsi_res; } - if (type == I40E_VSI_SRIOV) { + + if (!idx) { u64 hena = i40e_pf_get_default_rss_hena(pf); u8 broadcast[ETH_ALEN]; @@ -721,17 +757,29 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) spin_unlock_bh(&vsi->mac_filter_hash_lock); wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena); wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32)); + /* program mac filter only for VF VSI */ + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); } - /* program mac filter */ - ret = i40e_sync_vsi_filters(vsi); - if (ret) - dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); + /* storing VSI index and id for ADq and don't apply the mac filter */ + if (vf->adq_enabled) { + vf->ch[idx].vsi_idx = vsi->idx; + vf->ch[idx].vsi_id = vsi->id; + } /* Set VF bandwidth if specified */ if (vf->tx_rate) { + max_tx_rate = vf->tx_rate; + } else if (vf->ch[idx].max_tx_rate) { + max_tx_rate = vf->ch[idx].max_tx_rate; + } + + if (max_tx_rate) { + max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR); ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, - vf->tx_rate / 50, 0, NULL); + max_tx_rate, 0, NULL); if (ret) dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n", vf->vf_id, ret); @@ -742,6 +790,92 @@ error_alloc_vsi_res: } /** + * i40e_map_pf_queues_to_vsi + * @vf: pointer to the VF info + * + * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This + * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI. + **/ +static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + u32 reg, num_tc = 1; /* VF has at least one traffic class */ + u16 vsi_id, qps; + int i, j; + + if (vf->adq_enabled) + num_tc = vf->num_tc; + + for (i = 0; i < num_tc; i++) { + if (vf->adq_enabled) { + qps = vf->ch[i].num_qps; + vsi_id = vf->ch[i].vsi_id; + } else { + qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + vsi_id = vf->lan_vsi_id; + } + + for (j = 0; j < 7; j++) { + if (j * 2 >= qps) { + /* end of list */ + reg = 0x07FF07FF; + } else { + u16 qid = i40e_vc_get_pf_queue_id(vf, + vsi_id, + j * 2); + reg = qid; + qid = i40e_vc_get_pf_queue_id(vf, vsi_id, + (j * 2) + 1); + reg |= qid << 16; + } + i40e_write_rx_ctl(hw, + I40E_VSILAN_QTABLE(j, vsi_id), + reg); + } + } +} + +/** + * i40e_map_pf_to_vf_queues + * @vf: pointer to the VF info + * + * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This + * function takes care of the second part VPLAN_QTABLE & completes VF mappings. + **/ +static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + u32 reg, total_qps = 0; + u32 qps, num_tc = 1; /* VF has at least one traffic class */ + u16 vsi_id, qid; + int i, j; + + if (vf->adq_enabled) + num_tc = vf->num_tc; + + for (i = 0; i < num_tc; i++) { + if (vf->adq_enabled) { + qps = vf->ch[i].num_qps; + vsi_id = vf->ch[i].vsi_id; + } else { + qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + vsi_id = vf->lan_vsi_id; + } + + for (j = 0; j < qps; j++) { + qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j); + + reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK); + wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id), + reg); + total_qps++; + } + } +} + +/** * i40e_enable_vf_mappings * @vf: pointer to the VF info * @@ -751,8 +885,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; - u32 reg, total_queue_pairs = 0; - int j; + u32 reg; /* Tell the hardware we're using noncontiguous mapping. HW requires * that VF queues be mapped using this method, even when they are @@ -765,30 +898,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK; wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg); - /* map PF queues to VF queues */ - for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) { - u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j); - - reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK); - wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg); - total_queue_pairs++; - } - - /* map PF queues to VSI */ - for (j = 0; j < 7; j++) { - if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) { - reg = 0x07FF07FF; /* unused */ - } else { - u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, - j * 2); - reg = qid; - qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, - (j * 2) + 1); - reg |= qid << 16; - } - i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), - reg); - } + i40e_map_pf_to_vf_queues(vf); + i40e_map_pf_queues_to_vsi(vf); i40e_flush(hw); } @@ -824,7 +935,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf) struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; u32 reg_idx, reg; - int i, msix_vf; + int i, j, msix_vf; /* Start by disabling VF's configuration API to prevent the OS from * accessing the VF's VSI after it's freed / invalidated. @@ -846,6 +957,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf) vf->lan_vsi_id = 0; vf->num_mac = 0; } + + /* do the accounting and remove additional ADq VSI's */ + if (vf->adq_enabled && vf->ch[0].vsi_idx) { + for (j = 0; j < vf->num_tc; j++) { + /* At this point VSI0 is already released so don't + * release it again and only clear their values in + * structure variables + */ + if (j) + i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]); + vf->ch[j].vsi_idx = 0; + vf->ch[j].vsi_id = 0; + } + } msix_vf = pf->hw.func_caps.num_msix_vectors_vf; /* disable interrupts so the VF starts in a known state */ @@ -891,7 +1016,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) { struct i40e_pf *pf = vf->pf; int total_queue_pairs = 0; - int ret; + int ret, idx; if (vf->num_req_queues && vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF) @@ -900,11 +1025,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; /* allocate hw vsi context & associated resources */ - ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV); + ret = i40e_alloc_vsi_res(vf, 0); if (ret) goto error_alloc; total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + /* allocate additional VSIs based on tc information for ADq */ + if (vf->adq_enabled) { + if (pf->queues_left >= + (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) { + /* TC 0 always belongs to VF VSI */ + for (idx = 1; idx < vf->num_tc; idx++) { + ret = i40e_alloc_vsi_res(vf, idx); + if (ret) + goto error_alloc; + } + /* send correct number of queues */ + total_queue_pairs = I40E_MAX_VF_QUEUES; + } else { + dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n", + vf->vf_id); + vf->adq_enabled = false; + } + } + /* We account for each VF to get a default number of queue pairs. If * the VF has now requested more, we need to account for that to make * certain we never request more queues than we actually have left in @@ -1537,6 +1681,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg) } /** + * i40e_del_qch - delete all the additional VSIs created as a part of ADq + * @vf: pointer to VF structure + **/ +static void i40e_del_qch(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + int i; + + /* first element in the array belongs to primary VF VSI and we shouldn't + * delete it. We should however delete the rest of the VSIs created + */ + for (i = 1; i < vf->num_tc; i++) { + if (vf->ch[i].vsi_idx) { + i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]); + vf->ch[i].vsi_idx = 0; + vf->ch[i].vsi_id = 0; + } + } +} + +/** * i40e_vc_get_vf_resources_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1631,6 +1796,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ; + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -1855,27 +2023,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; struct i40e_pf *pf = vf->pf; - u16 vsi_id, vsi_queue_id; + u16 vsi_id, vsi_queue_id = 0; i40e_status aq_ret = 0; - int i; + int i, j = 0, idx = 0; + + vsi_id = qci->vsi_id; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } - vsi_id = qci->vsi_id; if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } + for (i = 0; i < qci->num_queue_pairs; i++) { qpi = &qci->qpair[i]; - vsi_queue_id = qpi->txq.queue_id; - if ((qpi->txq.vsi_id != vsi_id) || - (qpi->rxq.vsi_id != vsi_id) || - (qpi->rxq.queue_id != vsi_queue_id) || - !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) { + + if (!vf->adq_enabled) { + vsi_queue_id = qpi->txq.queue_id; + + if (qpi->txq.vsi_id != qci->vsi_id || + qpi->rxq.vsi_id != qci->vsi_id || + qpi->rxq.queue_id != vsi_queue_id) { + aq_ret = I40E_ERR_PARAM; + goto error_param; + } + } + + if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -1887,9 +2065,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) aq_ret = I40E_ERR_PARAM; goto error_param; } + + /* For ADq there can be up to 4 VSIs with max 4 queues each. + * VF does not know about these additional VSIs and all + * it cares is about its own queues. PF configures these queues + * to its appropriate VSIs based on TC mapping + **/ + if (vf->adq_enabled) { + if (j == (vf->ch[idx].num_qps - 1)) { + idx++; + j = 0; /* resetting the queue count */ + vsi_queue_id = 0; + } else { + j++; + vsi_queue_id++; + } + vsi_id = vf->ch[idx].vsi_id; + } } /* set vsi num_queue_pairs in use to num configured by VF */ - pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; + if (!vf->adq_enabled) { + pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = + qci->num_queue_pairs; + } else { + for (i = 0; i < vf->num_tc; i++) + pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs = + vf->ch[i].num_qps; + } error_param: /* send the response to the VF */ @@ -1898,6 +2100,33 @@ error_param: } /** + * i40e_validate_queue_map + * @vsi_id: vsi id + * @queuemap: Tx or Rx queue map + * + * check if Tx or Rx queue map is valid + **/ +static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id, + unsigned long queuemap) +{ + u16 vsi_queue_id, queue_id; + + for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) { + if (vf->adq_enabled) { + vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id; + queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF); + } else { + queue_id = vsi_queue_id; + } + + if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id)) + return -EINVAL; + } + + return 0; +} + +/** * i40e_vc_config_irq_map_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1911,9 +2140,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct virtchnl_irq_map_info *irqmap_info = (struct virtchnl_irq_map_info *)msg; struct virtchnl_vector_map *map; - u16 vsi_id, vsi_queue_id, vector_id; + u16 vsi_id, vector_id; i40e_status aq_ret = 0; - unsigned long tempmap; int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { @@ -1923,7 +2151,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < irqmap_info->num_vectors; i++) { map = &irqmap_info->vecmap[i]; - vector_id = map->vector_id; vsi_id = map->vsi_id; /* validate msg params */ @@ -1933,23 +2160,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - /* lookout for the invalid queue index */ - tempmap = map->rxq_map; - for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { - if (!i40e_vc_isvalid_queue_id(vf, vsi_id, - vsi_queue_id)) { - aq_ret = I40E_ERR_PARAM; - goto error_param; - } + if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; } - tempmap = map->txq_map; - for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { - if (!i40e_vc_isvalid_queue_id(vf, vsi_id, - vsi_queue_id)) { - aq_ret = I40E_ERR_PARAM; - goto error_param; - } + if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; } i40e_config_irq_link_list(vf, vsi_id, map); @@ -1975,6 +2193,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_pf *pf = vf->pf; u16 vsi_id = vqs->vsi_id; i40e_status aq_ret = 0; + int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; @@ -1993,6 +2212,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx])) aq_ret = I40E_ERR_TIMEOUT; + + /* need to start the rings for additional ADq VSI's as well */ + if (vf->adq_enabled) { + /* zero belongs to LAN VSI */ + for (i = 1; i < vf->num_tc; i++) { + if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx])) + aq_ret = I40E_ERR_TIMEOUT; + } + } + error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, @@ -2139,25 +2368,47 @@ error_param: /** * i40e_check_vf_permission * @vf: pointer to the VF info - * @macaddr: pointer to the MAC Address being checked + * @al: MAC address list from virtchnl * - * Check if the VF has permission to add or delete unicast MAC address - * filters and return error code -EPERM if not. Then check if the - * address filter requested is broadcast or zero and if so return - * an invalid MAC address error code. + * Check that the given list of MAC addresses is allowed. Will return -EPERM + * if any address in the list is not valid. Checks the following conditions: + * + * 1) broadcast and zero addresses are never valid + * 2) unicast addresses are not allowed if the VMM has administratively set + * the VF MAC address, unless the VF is marked as privileged. + * 3) There is enough space to add all the addresses. + * + * Note that to guarantee consistency, it is expected this function be called + * while holding the mac_filter_hash_lock, as otherwise the current number of + * addresses might not be accurate. **/ -static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr) +static inline int i40e_check_vf_permission(struct i40e_vf *vf, + struct virtchnl_ether_addr_list *al) { struct i40e_pf *pf = vf->pf; - int ret = 0; + int i; + + /* If this VF is not privileged, then we can't add more than a limited + * number of addresses. Check to make sure that the additions do not + * push us over the limit. + */ + if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && + (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) { + dev_err(&pf->pdev->dev, + "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n"); + return -EPERM; + } + + for (i = 0; i < al->num_elements; i++) { + u8 *addr = al->list[i].addr; + + if (is_broadcast_ether_addr(addr) || + is_zero_ether_addr(addr)) { + dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", + addr); + return I40E_ERR_INVALID_MAC_ADDR; + } - if (is_broadcast_ether_addr(macaddr) || - is_zero_ether_addr(macaddr)) { - dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", macaddr); - ret = I40E_ERR_INVALID_MAC_ADDR; - } else if (vf->pf_set_mac && !is_multicast_ether_addr(macaddr) && - !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && - !ether_addr_equal(macaddr, vf->default_lan_addr.addr)) { /* If the host VMM administrator has set the VF MAC address * administratively via the ndo_set_vf_mac command then deny * permission to the VF to add or delete unicast MAC addresses. @@ -2165,16 +2416,16 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr) * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ - dev_err(&pf->pdev->dev, - "VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n"); - ret = -EPERM; - } else if ((vf->num_mac >= I40E_VC_MAX_MAC_ADDR_PER_VF) && - !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { - dev_err(&pf->pdev->dev, - "VF is not trusted, switch the VF to trusted to add more functionality\n"); - ret = -EPERM; + if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && + !is_multicast_ether_addr(addr) && vf->pf_set_mac && + !ether_addr_equal(addr, vf->default_lan_addr.addr)) { + dev_err(&pf->pdev->dev, + "VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n"); + return -EPERM; + } } - return ret; + + return 0; } /** @@ -2201,11 +2452,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - for (i = 0; i < al->num_elements; i++) { - ret = i40e_check_vf_permission(vf, al->list[i].addr); - if (ret) - goto error_param; - } vsi = pf->vsi[vf->lan_vsi_idx]; /* Lock once, because all function inside for loop accesses VSI's @@ -2213,6 +2459,12 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) */ spin_lock_bh(&vsi->mac_filter_hash_lock); + ret = i40e_check_vf_permission(vf, al); + if (ret) { + spin_unlock_bh(&vsi->mac_filter_hash_lock); + goto error_param; + } + /* add new addresses to the list */ for (i = 0; i < al->num_elements; i++) { struct i40e_mac_filter *f; @@ -2688,6 +2940,618 @@ err: } /** + * i40e_validate_cloud_filter + * @mask: mask for TC filter + * @data: data for TC filter + * + * This function validates cloud filter programmed as TC filter for ADq + **/ +static int i40e_validate_cloud_filter(struct i40e_vf *vf, + struct virtchnl_filter *tc_filter) +{ + struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec; + struct virtchnl_l4_spec data = tc_filter->data.tcp_spec; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct i40e_mac_filter *f; + struct hlist_node *h; + bool found = false; + int bkt; + + if (!tc_filter->action) { + dev_info(&pf->pdev->dev, + "VF %d: Currently ADq doesn't support Drop Action\n", + vf->vf_id); + goto err; + } + + /* action_meta is TC number here to which the filter is applied */ + if (!tc_filter->action_meta || + tc_filter->action_meta > I40E_MAX_VF_VSI) { + dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n", + vf->vf_id, tc_filter->action_meta); + goto err; + } + + /* Check filter if it's programmed for advanced mode or basic mode. + * There are two ADq modes (for VF only), + * 1. Basic mode: intended to allow as many filter options as possible + * to be added to a VF in Non-trusted mode. Main goal is + * to add filters to its own MAC and VLAN id. + * 2. Advanced mode: is for allowing filters to be applied other than + * its own MAC or VLAN. This mode requires the VF to be + * Trusted. + */ + if (mask.dst_mac[0] && !mask.dst_ip[0]) { + vsi = pf->vsi[vf->lan_vsi_idx]; + f = i40e_find_mac(vsi, data.dst_mac); + + if (!f) { + dev_info(&pf->pdev->dev, + "Destination MAC %pM doesn't belong to VF %d\n", + data.dst_mac, vf->vf_id); + goto err; + } + + if (mask.vlan_id) { + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, + hlist) { + if (f->vlan == ntohs(data.vlan_id)) { + found = true; + break; + } + } + if (!found) { + dev_info(&pf->pdev->dev, + "VF %d doesn't have any VLAN id %u\n", + vf->vf_id, ntohs(data.vlan_id)); + goto err; + } + } + } else { + /* Check if VF is trusted */ + if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + dev_err(&pf->pdev->dev, + "VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n", + vf->vf_id); + return I40E_ERR_CONFIG; + } + } + + if (mask.dst_mac[0] & data.dst_mac[0]) { + if (is_broadcast_ether_addr(data.dst_mac) || + is_zero_ether_addr(data.dst_mac)) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n", + vf->vf_id, data.dst_mac); + goto err; + } + } + + if (mask.src_mac[0] & data.src_mac[0]) { + if (is_broadcast_ether_addr(data.src_mac) || + is_zero_ether_addr(data.src_mac)) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n", + vf->vf_id, data.src_mac); + goto err; + } + } + + if (mask.dst_port & data.dst_port) { + if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n", + vf->vf_id); + goto err; + } + } + + if (mask.src_port & data.src_port) { + if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n", + vf->vf_id); + goto err; + } + } + + if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW && + tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n", + vf->vf_id); + goto err; + } + + if (mask.vlan_id & data.vlan_id) { + if (ntohs(data.vlan_id) > I40E_MAX_VLANID) { + dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n", + vf->vf_id); + goto err; + } + } + + return I40E_SUCCESS; +err: + return I40E_ERR_CONFIG; +} + +/** + * i40e_find_vsi_from_seid - searches for the vsi with the given seid + * @vf: pointer to the VF info + * @seid - seid of the vsi it is searching for + **/ +static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + int i; + + for (i = 0; i < vf->num_tc ; i++) { + vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id); + if (vsi && vsi->seid == seid) + return vsi; + } + return NULL; +} + +/** + * i40e_del_all_cloud_filters + * @vf: pointer to the VF info + * + * This function deletes all cloud filters + **/ +static void i40e_del_all_cloud_filters(struct i40e_vf *vf) +{ + struct i40e_cloud_filter *cfilter = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct hlist_node *node; + int ret; + + hlist_for_each_entry_safe(cfilter, node, + &vf->cloud_filter_list, cloud_node) { + vsi = i40e_find_vsi_from_seid(vf, cfilter->seid); + + if (!vsi) { + dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n", + vf->vf_id, cfilter->seid); + continue; + } + + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, + false); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, false); + if (ret) + dev_err(&pf->pdev->dev, + "VF %d: Failed to delete cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + + hlist_del(&cfilter->cloud_node); + kfree(cfilter); + vf->num_cloud_filters--; + } +} + +/** + * i40e_vc_del_cloud_filter + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * This function deletes a cloud filter programmed as TC filter for ADq + **/ +static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg; + struct virtchnl_l4_spec mask = vcf->mask.tcp_spec; + struct virtchnl_l4_spec tcf = vcf->data.tcp_spec; + struct i40e_cloud_filter cfilter, *cf = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct hlist_node *node; + i40e_status aq_ret = 0; + int i, ret; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!vf->adq_enabled) { + dev_info(&pf->pdev->dev, + "VF %d: ADq not enabled, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (i40e_validate_cloud_filter(vf, vcf)) { + dev_info(&pf->pdev->dev, + "VF %d: Invalid input, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + memset(&cfilter, 0, sizeof(cfilter)); + /* parse destination mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i]; + + /* parse source mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i]; + + cfilter.vlan_id = mask.vlan_id & tcf.vlan_id; + cfilter.dst_port = mask.dst_port & tcf.dst_port; + cfilter.src_port = mask.src_port & tcf.src_port; + + switch (vcf->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + cfilter.n_proto = ETH_P_IP; + if (mask.dst_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip, + ARRAY_SIZE(tcf.dst_ip)); + else if (mask.src_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip, + ARRAY_SIZE(tcf.dst_ip)); + break; + case VIRTCHNL_TCP_V6_FLOW: + cfilter.n_proto = ETH_P_IPV6; + if (mask.dst_ip[3] & tcf.dst_ip[3]) + memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip, + sizeof(cfilter.ip.v6.dst_ip6)); + if (mask.src_ip[3] & tcf.src_ip[3]) + memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip, + sizeof(cfilter.ip.v6.src_ip6)); + break; + default: + /* TC filter can be configured based on different combinations + * and in this case IP is not a part of filter config + */ + dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n", + vf->vf_id); + } + + /* get the vsi to which the tc belongs to */ + vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx]; + cfilter.seid = vsi->seid; + cfilter.flags = vcf->field_flags; + + /* Deleting TC filter */ + if (tcf.dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false); + else + ret = i40e_add_del_cloud_filter(vsi, &cfilter, false); + if (ret) { + dev_err(&pf->pdev->dev, + "VF %d: Failed to delete cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto err; + } + + hlist_for_each_entry_safe(cf, node, + &vf->cloud_filter_list, cloud_node) { + if (cf->seid != cfilter.seid) + continue; + if (mask.dst_port) + if (cfilter.dst_port != cf->dst_port) + continue; + if (mask.dst_mac[0]) + if (!ether_addr_equal(cf->src_mac, cfilter.src_mac)) + continue; + /* for ipv4 data to be valid, only first byte of mask is set */ + if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0]) + if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip, + ARRAY_SIZE(tcf.dst_ip))) + continue; + /* for ipv6, mask is set for all sixteen bytes (4 words) */ + if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3]) + if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6, + sizeof(cfilter.ip.v6.src_ip6))) + continue; + if (mask.vlan_id) + if (cfilter.vlan_id != cf->vlan_id) + continue; + + hlist_del(&cf->cloud_node); + kfree(cf); + vf->num_cloud_filters--; + } + +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER, + aq_ret); +} + +/** + * i40e_vc_add_cloud_filter + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * This function adds a cloud filter programmed as TC filter for ADq + **/ +static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg; + struct virtchnl_l4_spec mask = vcf->mask.tcp_spec; + struct virtchnl_l4_spec tcf = vcf->data.tcp_spec; + struct i40e_cloud_filter *cfilter = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + i40e_status aq_ret = 0; + int i, ret; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!vf->adq_enabled) { + dev_info(&pf->pdev->dev, + "VF %d: ADq is not enabled, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (i40e_validate_cloud_filter(vf, vcf)) { + dev_info(&pf->pdev->dev, + "VF %d: Invalid input/s, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); + if (!cfilter) + return -ENOMEM; + + /* parse destination mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i]; + + /* parse source mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i]; + + cfilter->vlan_id = mask.vlan_id & tcf.vlan_id; + cfilter->dst_port = mask.dst_port & tcf.dst_port; + cfilter->src_port = mask.src_port & tcf.src_port; + + switch (vcf->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + cfilter->n_proto = ETH_P_IP; + if (mask.dst_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip, + ARRAY_SIZE(tcf.dst_ip)); + else if (mask.src_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip, + ARRAY_SIZE(tcf.dst_ip)); + break; + case VIRTCHNL_TCP_V6_FLOW: + cfilter->n_proto = ETH_P_IPV6; + if (mask.dst_ip[3] & tcf.dst_ip[3]) + memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip, + sizeof(cfilter->ip.v6.dst_ip6)); + if (mask.src_ip[3] & tcf.src_ip[3]) + memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip, + sizeof(cfilter->ip.v6.src_ip6)); + break; + default: + /* TC filter can be configured based on different combinations + * and in this case IP is not a part of filter config + */ + dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n", + vf->vf_id); + } + + /* get the VSI to which the TC belongs to */ + vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx]; + cfilter->seid = vsi->seid; + cfilter->flags = vcf->field_flags; + + /* Adding cloud filter programmed as TC filter */ + if (tcf.dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + if (ret) { + dev_err(&pf->pdev->dev, + "VF %d: Failed to add cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto err; + } + + INIT_HLIST_NODE(&cfilter->cloud_node); + hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list); + vf->num_cloud_filters++; +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER, + aq_ret); +} + +/** + * i40e_vc_add_qch_msg: Add queue channel and enable ADq + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + **/ +static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_tc_info *tci = + (struct virtchnl_tc_info *)msg; + struct i40e_pf *pf = vf->pf; + struct i40e_link_status *ls = &pf->hw.phy.link_info; + int i, adq_request_qps = 0, speed = 0; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* ADq cannot be applied if spoof check is ON */ + if (vf->spoofchk) { + dev_err(&pf->pdev->dev, + "Spoof check is ON, turn it OFF to enable ADq\n"); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) { + dev_err(&pf->pdev->dev, + "VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* max number of traffic classes for VF currently capped at 4 */ + if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) { + dev_err(&pf->pdev->dev, + "VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n", + vf->vf_id, tci->num_tc); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* validate queues for each TC */ + for (i = 0; i < tci->num_tc; i++) + if (!tci->list[i].count || + tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) { + dev_err(&pf->pdev->dev, + "VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n", + vf->vf_id, i, tci->list[i].count); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* need Max VF queues but already have default number of queues */ + adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF; + + if (pf->queues_left < adq_request_qps) { + dev_err(&pf->pdev->dev, + "No queues left to allocate to VF %d\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } else { + /* we need to allocate max VF queues to enable ADq so as to + * make sure ADq enabled VF always gets back queues when it + * goes through a reset. + */ + vf->num_queue_pairs = I40E_MAX_VF_QUEUES; + } + + /* get link speed in MB to validate rate limit */ + switch (ls->link_speed) { + case VIRTCHNL_LINK_SPEED_100MB: + speed = SPEED_100; + break; + case VIRTCHNL_LINK_SPEED_1GB: + speed = SPEED_1000; + break; + case VIRTCHNL_LINK_SPEED_10GB: + speed = SPEED_10000; + break; + case VIRTCHNL_LINK_SPEED_20GB: + speed = SPEED_20000; + break; + case VIRTCHNL_LINK_SPEED_25GB: + speed = SPEED_25000; + break; + case VIRTCHNL_LINK_SPEED_40GB: + speed = SPEED_40000; + break; + default: + dev_err(&pf->pdev->dev, + "Cannot detect link speed\n"); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* parse data from the queue channel info */ + vf->num_tc = tci->num_tc; + for (i = 0; i < vf->num_tc; i++) { + if (tci->list[i].max_tx_rate) { + if (tci->list[i].max_tx_rate > speed) { + dev_err(&pf->pdev->dev, + "Invalid max tx rate %llu specified for VF %d.", + tci->list[i].max_tx_rate, + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } else { + vf->ch[i].max_tx_rate = + tci->list[i].max_tx_rate; + } + } + vf->ch[i].num_qps = tci->list[i].count; + } + + /* set this flag only after making sure all inputs are sane */ + vf->adq_enabled = true; + /* num_req_queues is set when user changes number of queues via ethtool + * and this causes issue for default VSI(which depends on this variable) + * when ADq is enabled, hence reset it. + */ + vf->num_req_queues = 0; + + /* reset the VF in order to allocate resources */ + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + + return I40E_SUCCESS; + + /* send the response to the VF */ +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS, + aq_ret); +} + +/** + * i40e_vc_del_qch_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + **/ +static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) +{ + struct i40e_pf *pf = vf->pf; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (vf->adq_enabled) { + i40e_del_all_cloud_filters(vf); + i40e_del_qch(vf); + vf->adq_enabled = false; + vf->num_tc = 0; + dev_info(&pf->pdev->dev, + "Deleting Queue Channels and cloud filters for ADq on VF %d\n", + vf->vf_id); + } else { + dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + } + + /* reset the VF in order to allocate resources */ + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + + return I40E_SUCCESS; + +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS, + aq_ret); +} + +/** * i40e_vc_process_vf_msg * @pf: pointer to the PF structure * @vf_id: source VF id @@ -2816,7 +3680,18 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, case VIRTCHNL_OP_REQUEST_QUEUES: ret = i40e_vc_request_queues_msg(vf, msg, msglen); break; - + case VIRTCHNL_OP_ENABLE_CHANNELS: + ret = i40e_vc_add_qch_msg(vf, msg); + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + ret = i40e_vc_del_qch_msg(vf, msg); + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: + ret = i40e_vc_add_cloud_filter(vf, msg); + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: + ret = i40e_vc_del_cloud_filter(vf, msg); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", @@ -2889,6 +3764,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) int ret = 0; struct hlist_node *h; int bkt; + u8 i; /* validate the request */ if (vf_id >= pf->num_alloc_vfs) { @@ -2900,6 +3776,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; + + /* When the VF is resetting wait until it is done. + * It can take up to 200 milliseconds, + * but wait for up to 300 milliseconds to be safe. + */ + for (i = 0; i < 15; i++) { + if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) + break; + msleep(20); + } if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", vf_id); @@ -3382,6 +4268,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); + + if (vf->adq_enabled) { + if (!vf->trusted) { + dev_info(&pf->pdev->dev, + "VF %u no longer Trusted, deleting all cloud filters\n", + vf_id); + i40e_del_all_cloud_filters(vf); + } + } + out: return ret; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5efc4f92bb37..6852599b2379 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -69,6 +69,19 @@ enum i40e_vf_capabilities { I40E_VIRTCHNL_VF_CAP_IWARP, }; +/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI. + * These variables are used to store indices, id's and number of queues + * for each VSI including that of primary VF VSI. Each Traffic class is + * termed as channel and each channel can in-turn have 4 queues which + * means max 16 queues overall per VF. + */ +struct i40evf_channel { + u16 vsi_idx; /* index in PF struct for all channel VSIs */ + u16 vsi_id; /* VSI ID used by firmware */ + u16 num_qps; /* number of queue pairs requested by user */ + u64 max_tx_rate; /* bandwidth rate allocation for VSIs */ +}; + /* VF information structure */ struct i40e_vf { struct i40e_pf *pf; @@ -111,6 +124,13 @@ struct i40e_vf { u16 num_mac; u16 num_vlan; + /* ADq related variables */ + bool adq_enabled; /* flag to enable adq */ + u8 num_tc; + struct i40evf_channel ch[I40E_MAX_VF_VSI]; + struct hlist_head cloud_filter_list; + u16 num_cloud_filters; + /* RDMA Client */ struct virtchnl_iwarp_qvlist_info *qvlist_info; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 357d6051281f..e088d23eb083 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi) */ smp_rmb(); tx_ring->tx_stats.prev_pkt_ctr = - i40evf_get_tx_pending(tx_ring, false) ? packets : -1; + i40evf_get_tx_pending(tx_ring, true) ? packets : -1; } } } @@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) val); } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; + + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | - (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); + (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - return adapter->rx_rings[idx].rx_itr_setting; -} - -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - - return adapter->tx_rings[idx].tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; - - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); + u32 intval; - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } - - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 7798a6645c3f..9129447d079b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -28,31 +28,35 @@ #define _I40E_TXRX_H_ /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) #define I40E_INTRL_8K 125 /* 8000 ints/sec */ @@ -362,8 +366,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 9690c1ea019e..279dced87e47 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -52,7 +52,10 @@ #include <linux/socket.h> #include <linux/jiffies.h> #include <net/ip6_checksum.h> +#include <net/pkt_cls.h> #include <net/udp.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> #include "i40e_type.h" #include <linux/avf/virtchnl.h> @@ -106,6 +109,7 @@ struct i40e_vsi { #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) +#define I40EVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. @@ -117,9 +121,8 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; u32 ring_mask; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 or 1 update ITR */ u16 v_idx; /* index in the vsi->q_vector array. */ u16 reg_idx; /* register index of the interrupt */ char name[IFNAMSIZ + 15]; @@ -169,6 +172,28 @@ struct i40evf_vlan_filter { bool add; /* filter needs to be added */ }; +#define I40EVF_MAX_TRAFFIC_CLASS 4 +/* State of traffic class creation */ +enum i40evf_tc_state_t { + __I40EVF_TC_INVALID, /* no traffic class, default state */ + __I40EVF_TC_RUNNING, /* traffic classes have been created */ +}; + +/* channel info */ +struct i40evf_channel_config { + struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS]; + enum i40evf_tc_state_t state; + u8 total_qps; +}; + +/* State of cloud filter */ +enum i40evf_cloud_filter_state_t { + __I40EVF_CF_INVALID, /* cloud filter not added */ + __I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */ + __I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */ + __I40EVF_CF_ACTIVE, /* cloud filter is active */ +}; + /* Driver state. The order of these is important! */ enum i40evf_state_t { __I40EVF_STARTUP, /* driver loaded, probe complete */ @@ -190,6 +215,36 @@ enum i40evf_critical_section_t { __I40EVF_IN_REMOVE_TASK, /* device being removed */ }; +#define I40EVF_CLOUD_FIELD_OMAC 0x01 +#define I40EVF_CLOUD_FIELD_IMAC 0x02 +#define I40EVF_CLOUD_FIELD_IVLAN 0x04 +#define I40EVF_CLOUD_FIELD_TEN_ID 0x08 +#define I40EVF_CLOUD_FIELD_IIP 0x10 + +#define I40EVF_CF_FLAGS_OMAC I40EVF_CLOUD_FIELD_OMAC +#define I40EVF_CF_FLAGS_IMAC I40EVF_CLOUD_FIELD_IMAC +#define I40EVF_CF_FLAGS_IMAC_IVLAN (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_IVLAN) +#define I40EVF_CF_FLAGS_IMAC_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC (I40EVF_CLOUD_FIELD_OMAC |\ + I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_IVLAN |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_IIP I40E_CLOUD_FIELD_IIP + +/* bookkeeping of cloud filters */ +struct i40evf_cloud_filter { + enum i40evf_cloud_filter_state_t state; + struct list_head list; + struct virtchnl_filter f; + unsigned long cookie; + bool del; /* filter needs to be deleted */ + bool add; /* filter needs to be added */ +}; + /* board specific private data structure */ struct i40evf_adapter { struct timer_list watchdog_timer; @@ -225,13 +280,10 @@ struct i40evf_adapter { u32 flags; #define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0) -#define I40EVF_FLAG_IMIR_ENABLED BIT(1) -#define I40EVF_FLAG_MQ_CAPABLE BIT(2) #define I40EVF_FLAG_PF_COMMS_FAILED BIT(3) #define I40EVF_FLAG_RESET_PENDING BIT(4) #define I40EVF_FLAG_RESET_NEEDED BIT(5) #define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(6) -#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(7) #define I40EVF_FLAG_ADDR_SET_BY_PF BIT(8) #define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(9) #define I40EVF_FLAG_CLIENT_NEEDS_OPEN BIT(10) @@ -241,6 +293,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_ALLMULTI_ON BIT(14) #define I40EVF_FLAG_LEGACY_RX BIT(15) #define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16) +#define I40EVF_FLAG_QUEUES_DISABLED BIT(17) /* duplicates for common code */ #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED @@ -269,6 +322,10 @@ struct i40evf_adapter { #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18) #define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19) #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20) +#define I40EVF_FLAG_AQ_ENABLE_CHANNELS BIT(21) +#define I40EVF_FLAG_AQ_DISABLE_CHANNELS BIT(22) +#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23) +#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24) /* OS defined structs */ struct net_device *netdev; @@ -314,6 +371,13 @@ struct i40evf_adapter { u16 rss_lut_size; u8 *rss_key; u8 *rss_lut; + /* ADQ related members */ + struct i40evf_channel_config ch_config; + u8 num_tc; + struct list_head cloud_filter_list; + /* lock to protest access to the cloud filter list */ + spinlock_t cloud_filter_list_lock; + u16 num_cloud_filters; }; @@ -380,4 +444,8 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len); void i40evf_notify_client_l2_params(struct i40e_vsi *vsi); void i40evf_notify_client_open(struct i40e_vsi *vsi); void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset); +void i40evf_enable_channels(struct i40evf_adapter *adapter); +void i40evf_disable_channels(struct i40evf_adapter *adapter); +void i40evf_add_cloud_filter(struct i40evf_adapter *adapter); +void i40evf_del_cloud_filter(struct i40evf_adapter *adapter); #endif /* _I40EVF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index e2d8aa19d205..e6793255de0b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev, rx_ring = &adapter->rx_rings[queue]; tx_ring = &adapter->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; return 0; } @@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev, /** * i40evf_set_itr_per_queue - set ITR values for specific queue - * @vsi: the VSI to set values for + * @adapter: the VF adapter struct to set values for * @ec: coalesce settings from ethtool * @queue: the queue to modify * @@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, { struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_hw *hw = &adapter->hw; struct i40e_q_vector *q_vector; - u16 vector; - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC; + rx_ring->itr_setting ^= I40E_ITR_DYNAMIC; - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC; + tx_ring->itr_setting ^= I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - i40e_flush(hw); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ } /** @@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) || + (ec->rx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) || + (ec->tx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } @@ -699,6 +695,12 @@ static int i40evf_set_channels(struct net_device *netdev, return -EINVAL; } + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n"); + return -EINVAL; + } + /* All of these should have already been checked by ethtool before this * even gets to us, but just to be sure. */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 16989ad2ca90..7e7cd80abaf4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) rx_ring->vsi = &adapter->vsi; q_vector->rx.ring = rx_ring; q_vector->rx.count++; - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector->ring_mask |= BIT(r_idx); - q_vector->itr_countdown = ITR_COUNTDOWN_START; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx), + q_vector->rx.current_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; } /** @@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) tx_ring->vsi = &adapter->vsi; q_vector->tx.ring = tx_ring; q_vector->tx.count++; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - q_vector->itr_countdown = ITR_COUNTDOWN_START; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); q_vector->num_ringpairs++; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx), + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; } /** @@ -783,7 +785,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev, **/ static struct i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter, - u8 *macaddr) + const u8 *macaddr) { struct i40evf_mac_filter *f; @@ -806,20 +808,18 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter, **/ static struct i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, - u8 *macaddr) + const u8 *macaddr) { struct i40evf_mac_filter *f; if (!macaddr) return NULL; - spin_lock_bh(&adapter->mac_vlan_list_lock); - f = i40evf_find_filter(adapter, macaddr); if (!f) { f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) - goto clearout; + return f; ether_addr_copy(f->macaddr, macaddr); @@ -830,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, f->remove = false; } -clearout: - spin_unlock_bh(&adapter->mac_vlan_list_lock); return f; } @@ -866,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p) adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; } + f = i40evf_add_filter(adapter, addr->sa_data); + spin_unlock_bh(&adapter->mac_vlan_list_lock); - f = i40evf_add_filter(adapter, addr->sa_data); if (f) { ether_addr_copy(hw->mac.addr, addr->sa_data); ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); @@ -878,50 +877,64 @@ static int i40evf_set_mac(struct net_device *netdev, void *p) } /** - * i40evf_set_rx_mode - NDO callback to set the netdev filters - * @netdev: network interface device structure - **/ -static void i40evf_set_rx_mode(struct net_device *netdev) + * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40evf_mac_filter *f, *ftmp; - struct netdev_hw_addr *uca; - struct netdev_hw_addr *mca; - struct netdev_hw_addr *ha; - - /* add addr if not already in the filter list */ - netdev_for_each_uc_addr(uca, netdev) { - i40evf_add_filter(adapter, uca->addr); - } - netdev_for_each_mc_addr(mca, netdev) { - i40evf_add_filter(adapter, mca->addr); - } - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { - netdev_for_each_mc_addr(mca, netdev) - if (ether_addr_equal(mca->addr, f->macaddr)) - goto bottom_of_search_loop; - - netdev_for_each_uc_addr(uca, netdev) - if (ether_addr_equal(uca->addr, f->macaddr)) - goto bottom_of_search_loop; + if (i40evf_add_filter(adapter, addr)) + return 0; + else + return -ENOMEM; +} - for_each_dev_addr(netdev, ha) - if (ether_addr_equal(ha->addr, f->macaddr)) - goto bottom_of_search_loop; +/** + * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40evf_mac_filter *f; - if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr)) - goto bottom_of_search_loop; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; - /* f->macaddr wasn't found in uc, mc, or ha list so delete it */ + f = i40evf_find_filter(adapter, addr); + if (f) { f->remove = true; adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; - -bottom_of_search_loop: - continue; } + return 0; +} + +/** + * i40evf_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + **/ +static void i40evf_set_rx_mode(struct net_device *netdev) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + __dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync); + __dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync); + spin_unlock_bh(&adapter->mac_vlan_list_lock); if (netdev->flags & IFF_PROMISC && !(adapter->flags & I40EVF_FLAG_PROMISC_ON)) @@ -936,8 +949,6 @@ bottom_of_search_loop: else if (!(netdev->flags & IFF_ALLMULTI) && adapter->flags & I40EVF_FLAG_ALLMULTI_ON) adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI; - - spin_unlock_bh(&adapter->mac_vlan_list_lock); } /** @@ -1025,7 +1036,9 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter) void i40evf_down(struct i40evf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct i40evf_vlan_filter *vlf; struct i40evf_mac_filter *f; + struct i40evf_cloud_filter *cf; if (adapter->state <= __I40EVF_DOWN_PENDING) return; @@ -1038,17 +1051,29 @@ void i40evf_down(struct i40evf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); + /* clear the sync flag on all filters */ + __dev_uc_unsync(adapter->netdev, NULL); + __dev_mc_unsync(adapter->netdev, NULL); + /* remove all MAC filters */ list_for_each_entry(f, &adapter->mac_filter_list, list) { f->remove = true; } + /* remove all VLAN filters */ - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - f->remove = true; + list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { + vlf->remove = true; } spin_unlock_bh(&adapter->mac_vlan_list_lock); + /* remove all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->del = true; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) && adapter->state != __I40EVF_RESETTING) { /* cancel any current operation */ @@ -1059,6 +1084,7 @@ void i40evf_down(struct i40evf_adapter *adapter) */ adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER; + adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES; } @@ -1144,6 +1170,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) */ if (adapter->num_req_queues) num_active_queues = adapter->num_req_queues; + else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + num_active_queues = adapter->ch_config.total_qps; else num_active_queues = min_t(int, adapter->vsi_res->num_queue_pairs, @@ -1169,7 +1198,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; - tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; + tx_ring->itr_setting = I40E_ITR_TX_DEF; if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1178,7 +1207,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; + rx_ring->itr_setting = I40E_ITR_RX_DEF; } adapter->num_active_queues = num_active_queues; @@ -1471,6 +1500,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter) goto err_alloc_q_vectors; } + /* If we've made it so far while ADq flag being ON, then we haven't + * bailed out anywhere in middle. And ADq isn't just enabled but actual + * resources have been allocated in the reset path. + * Now we can truly claim that ADq is enabled. + */ + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created", + adapter->num_tc); + dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u", (adapter->num_active_queues > 1) ? "Enabled" : "Disabled", adapter->num_active_queues); @@ -1712,6 +1751,27 @@ static void i40evf_watchdog_task(struct work_struct *work) i40evf_set_promiscuous(adapter, 0); goto watchdog_done; } + + if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) { + i40evf_enable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) { + i40evf_disable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) { + i40evf_add_cloud_filter(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) { + i40evf_del_cloud_filter(adapter); + goto watchdog_done; + } + schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); if (adapter->state == __I40EVF_RUNNING) @@ -1735,6 +1795,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) { struct i40evf_mac_filter *f, *ftmp; struct i40evf_vlan_filter *fv, *fvtmp; + struct i40evf_cloud_filter *cf, *cftmp; adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED; @@ -1756,7 +1817,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); - /* Delete all of the filters, both MAC and VLAN. */ + /* Delete all of the filters */ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { list_del(&f->list); kfree(f); @@ -1769,6 +1830,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) spin_unlock_bh(&adapter->mac_vlan_list_lock); + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + i40evf_free_misc_irq(adapter); i40evf_reset_interrupt_capability(adapter); i40evf_free_queues(adapter); @@ -1798,9 +1867,11 @@ static void i40evf_reset_task(struct work_struct *work) struct i40evf_adapter *adapter = container_of(work, struct i40evf_adapter, reset_task); + struct virtchnl_vf_resource *vfres = adapter->vf_res; struct net_device *netdev = adapter->netdev; struct i40e_hw *hw = &adapter->hw; struct i40evf_vlan_filter *vlf; + struct i40evf_cloud_filter *cf; struct i40evf_mac_filter *f; u32 reg_val; int i = 0, err; @@ -1893,6 +1964,7 @@ continue_reset: i40evf_free_all_rx_resources(adapter); i40evf_free_all_tx_resources(adapter); + adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED; /* kill and reinit the admin queue */ i40evf_shutdown_adminq(hw); adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -1924,8 +1996,19 @@ continue_reset: spin_unlock_bh(&adapter->mac_vlan_list_lock); + /* check if TCs are running and re-add all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->add = true; + } + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; i40evf_misc_irq_enable(adapter); mod_timer(&adapter->watchdog_timer, jiffies + 2); @@ -2191,6 +2274,712 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter) } /** + * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth + * @adapter: board private structure + * @max_tx_rate: max Tx bw for a tc + **/ +static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter, + u64 max_tx_rate) +{ + int speed = 0, ret = 0; + + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + speed = 40000; + break; + case I40E_LINK_SPEED_25GB: + speed = 25000; + break; + case I40E_LINK_SPEED_20GB: + speed = 20000; + break; + case I40E_LINK_SPEED_10GB: + speed = 10000; + break; + case I40E_LINK_SPEED_1GB: + speed = 1000; + break; + case I40E_LINK_SPEED_100MB: + speed = 100; + break; + default: + break; + } + + if (max_tx_rate > speed) { + dev_err(&adapter->pdev->dev, + "Invalid tx rate specified\n"); + ret = -EINVAL; + } + + return ret; +} + +/** + * i40evf_validate_channel_config - validate queue mapping info + * @adapter: board private structure + * @mqprio_qopt: queue parameters + * + * This function validates if the config provided by the user to + * configure queue channels is valid or not. Returns 0 on a valid + * config. + **/ +static int i40evf_validate_ch_config(struct i40evf_adapter *adapter, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 total_max_rate = 0; + int i, num_qps = 0; + u64 tx_rate = 0; + int ret = 0; + + if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS || + mqprio_qopt->qopt.num_tc < 1) + return -EINVAL; + + for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) { + if (!mqprio_qopt->qopt.count[i] || + mqprio_qopt->qopt.offset[i] != num_qps) + return -EINVAL; + if (mqprio_qopt->min_rate[i]) { + dev_err(&adapter->pdev->dev, + "Invalid min tx rate (greater than 0) specified\n"); + return -EINVAL; + } + /*convert to Mbps */ + tx_rate = div_u64(mqprio_qopt->max_rate[i], + I40EVF_MBPS_DIVISOR); + total_max_rate += tx_rate; + num_qps += mqprio_qopt->qopt.count[i]; + } + if (num_qps > MAX_QUEUES) + return -EINVAL; + + ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate); + return ret; +} + +/** + * i40evf_del_all_cloud_filters - delete all cloud filters + * on the traffic classes + **/ +static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf, *cftmp; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); +} + +/** + * __i40evf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type_date: tc offload data + * + * This function processes the config information provided by the + * user to configure traffic classes/queue channels and packages the + * information to request the PF to setup traffic classes. + * + * Returns 0 on success. + **/ +static int __i40evf_setup_tc(struct net_device *netdev, void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct virtchnl_vf_resource *vfres = adapter->vf_res; + u8 num_tc = 0, total_qps = 0; + int ret = 0, netdev_tc = 0; + u64 max_tx_rate; + u16 mode; + int i; + + num_tc = mqprio_qopt->qopt.num_tc; + mode = mqprio_qopt->mode; + + /* delete queue_channel */ + if (!mqprio_qopt->qopt.hw) { + if (adapter->ch_config.state == __I40EVF_TC_RUNNING) { + /* reset the tc configuration */ + netdev_reset_tc(netdev); + adapter->num_tc = 0; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + i40evf_del_all_cloud_filters(adapter); + adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS; + goto exit; + } else { + return -EINVAL; + } + } + + /* add queue channel */ + if (mode == TC_MQPRIO_MODE_CHANNEL) { + if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) { + dev_err(&adapter->pdev->dev, "ADq not supported\n"); + return -EOPNOTSUPP; + } + if (adapter->ch_config.state != __I40EVF_TC_INVALID) { + dev_err(&adapter->pdev->dev, "TC configuration already exists\n"); + return -EINVAL; + } + + ret = i40evf_validate_ch_config(adapter, mqprio_qopt); + if (ret) + return ret; + /* Return if same TC config is requested */ + if (adapter->num_tc == num_tc) + return 0; + adapter->num_tc = num_tc; + + for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) { + if (i < num_tc) { + adapter->ch_config.ch_info[i].count = + mqprio_qopt->qopt.count[i]; + adapter->ch_config.ch_info[i].offset = + mqprio_qopt->qopt.offset[i]; + total_qps += mqprio_qopt->qopt.count[i]; + max_tx_rate = mqprio_qopt->max_rate[i]; + /* convert to Mbps */ + max_tx_rate = div_u64(max_tx_rate, + I40EVF_MBPS_DIVISOR); + adapter->ch_config.ch_info[i].max_tx_rate = + max_tx_rate; + } else { + adapter->ch_config.ch_info[i].count = 1; + adapter->ch_config.ch_info[i].offset = 0; + } + } + adapter->ch_config.total_qps = total_qps; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS; + netdev_reset_tc(netdev); + /* Report the tc mapping up the stack */ + netdev_set_num_tc(adapter->netdev, num_tc); + for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) { + u16 qcount = mqprio_qopt->qopt.count[i]; + u16 qoffset = mqprio_qopt->qopt.offset[i]; + + if (i < num_tc) + netdev_set_tc_queue(netdev, netdev_tc++, qcount, + qoffset); + } + } +exit: + return ret; +} + +/** + * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel + * @adapter: board private structure + * @cls_flower: pointer to struct tc_cls_flower_offload + * @filter: pointer to cloud filter structure + */ +static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *f, + struct i40evf_cloud_filter *filter) +{ + u16 n_proto_mask = 0; + u16 n_proto_key = 0; + u8 field_flags = 0; + u16 addr_type = 0; + u16 n_proto = 0; + int i = 0; + struct virtchnl_filter *vf = &filter->f; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { + dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + + if (mask->keyid != 0) + field_flags |= I40EVF_CLOUD_FIELD_TEN_ID; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + n_proto = n_proto_key & n_proto_mask; + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) + return -EINVAL; + if (n_proto == ETH_P_IPV6) { + /* specify flow type as TCP IPv6 */ + vf->flow_type = VIRTCHNL_TCP_V6_FLOW; + } + + if (key->ip_proto != IPPROTO_TCP) { + dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n"); + return -EINVAL; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + /* use is_broadcast and is_zero to check for all 0xf or 0 */ + if (!is_zero_ether_addr(mask->dst)) { + if (is_broadcast_ether_addr(mask->dst)) { + field_flags |= I40EVF_CLOUD_FIELD_OMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n", + mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(mask->src)) { + if (is_broadcast_ether_addr(mask->src)) { + field_flags |= I40EVF_CLOUD_FIELD_IMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n", + mask->src); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(key->dst)) + if (is_valid_ether_addr(key->dst) || + is_multicast_ether_addr(key->dst)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + vf->mask.tcp_spec.dst_mac[i] |= 0xff; + ether_addr_copy(vf->data.tcp_spec.dst_mac, + key->dst); + } + + if (!is_zero_ether_addr(key->src)) + if (is_valid_ether_addr(key->src) || + is_multicast_ether_addr(key->src)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + vf->mask.tcp_spec.src_mac[i] |= 0xff; + ether_addr_copy(vf->data.tcp_spec.src_mac, + key->src); + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_id) { + if (mask->vlan_id == VLAN_VID_MASK) { + field_flags |= I40EVF_CLOUD_FIELD_IVLAN; + } else { + dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n", + mask->vlan_id); + return I40E_ERR_CONFIG; + } + } + vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); + vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + + addr_type = key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + if (mask->dst) { + if (mask->dst == cpu_to_be32(0xffffffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (mask->src) { + if (mask->src == cpu_to_be32(0xffffffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) { + dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n"); + return I40E_ERR_CONFIG; + } + if (key->dst) { + vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff); + vf->data.tcp_spec.dst_ip[0] = key->dst; + } + if (key->src) { + vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff); + vf->data.tcp_spec.src_ip[0] = key->src; + } + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + /* validate mask, make sure it is not IPV6_ADDR_ANY */ + if (ipv6_addr_any(&mask->dst)) { + dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n", + IPV6_ADDR_ANY); + return I40E_ERR_CONFIG; + } + + /* src and dest IPv6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1) which can be represented as ::1 + */ + if (ipv6_addr_loopback(&key->dst) || + ipv6_addr_loopback(&key->src)) { + dev_err(&adapter->pdev->dev, + "ipv6 addr should not be loopback\n"); + return I40E_ERR_CONFIG; + } + if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + field_flags |= I40EVF_CLOUD_FIELD_IIP; + + for (i = 0; i < 4; i++) + vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff); + memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32, + sizeof(vf->data.tcp_spec.dst_ip)); + for (i = 0; i < 4; i++) + vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff); + memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32, + sizeof(vf->data.tcp_spec.src_ip)); + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + + if (mask->src) { + if (mask->src == cpu_to_be16(0xffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad src port mask %u\n", + be16_to_cpu(mask->src)); + return I40E_ERR_CONFIG; + } + } + + if (mask->dst) { + if (mask->dst == cpu_to_be16(0xffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n", + be16_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + if (key->dst) { + vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); + vf->data.tcp_spec.dst_port = key->dst; + } + + if (key->src) { + vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff); + vf->data.tcp_spec.src_port = key->src; + } + } + vf->field_flags = field_flags; + + return 0; +} + +/** + * i40evf_handle_tclass - Forward to a traffic class on the device + * @adapter: board private structure + * @tc: traffic class index on the device + * @filter: pointer to cloud filter structure + */ +static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc, + struct i40evf_cloud_filter *filter) +{ + if (tc == 0) + return 0; + if (tc < adapter->num_tc) { + if (!filter->f.data.tcp_spec.dst_port) { + dev_err(&adapter->pdev->dev, + "Specify destination port to redirect to traffic class other than TC0\n"); + return -EINVAL; + } + } + /* redirect to a traffic class on the same device */ + filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT; + filter->f.action_meta = tc; + return 0; +} + +/** + * i40evf_configure_clsflower - Add tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int i40evf_configure_clsflower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); + struct i40evf_cloud_filter *filter = NULL; + int err = -EINVAL, count = 50; + + if (tc < 0) { + dev_err(&adapter->pdev->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, + &adapter->crit_section)) { + if (--count == 0) + goto err; + udelay(1); + } + + filter->cookie = cls_flower->cookie; + + /* set the mask to all zeroes to begin with */ + memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec)); + /* start out with flow type and eth type IPv4 to begin with */ + filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; + err = i40evf_parse_cls_flower(adapter, cls_flower, filter); + if (err < 0) + goto err; + + err = i40evf_handle_tclass(adapter, tc, filter); + if (err < 0) + goto err; + + /* add filter to the list */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_add_tail(&filter->list, &adapter->cloud_filter_list); + adapter->num_cloud_filters++; + filter->add = true; + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; + spin_unlock_bh(&adapter->cloud_filter_list_lock); +err: + if (err) + kfree(filter); + + clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); + return err; +} + +/* i40evf_find_cf - Find the cloud filter in the list + * @adapter: Board private structure + * @cookie: filter specific cookie + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * cloud_filter_list_lock. + */ +static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter, + unsigned long *cookie) +{ + struct i40evf_cloud_filter *filter = NULL; + + if (!cookie) + return NULL; + + list_for_each_entry(filter, &adapter->cloud_filter_list, list) { + if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) + return filter; + } + return NULL; +} + +/** + * i40evf_delete_clsflower - Remove tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int i40evf_delete_clsflower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct i40evf_cloud_filter *filter = NULL; + int err = 0; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + filter = i40evf_find_cf(adapter, &cls_flower->cookie); + if (filter) { + filter->del = true; + adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; + } else { + err = -EINVAL; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + return err; +} + +/** + * i40evf_setup_tc_cls_flower - flower classifier offloads + * @netdev: net device to configure + * @type_data: offload data + */ +static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return i40evf_configure_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_DESTROY: + return i40evf_delete_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +/** + * i40evf_setup_tc_block_cb - block callback for tc + * @type: type of offload + * @type_data: offload data + * @cb_priv: + * + * This function is the block callback for traffic classes + **/ +static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + switch (type) { + case TC_SETUP_CLSFLOWER: + return i40evf_setup_tc_cls_flower(cb_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** + * i40evf_setup_tc_block - register callbacks for tc + * @netdev: network interface device structure + * @f: tc offload data + * + * This function registers block callbacks for tc + * offloads + **/ +static int i40evf_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb, + adapter, adapter); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + +/** + * i40evf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type: type of offload + * @type_date: tc offload data + * + * This function is the callback to ndo_setup_tc in the + * netdev_ops. + * + * Returns 0 on success + **/ +static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_MQPRIO: + return __i40evf_setup_tc(netdev, type_data); + case TC_SETUP_BLOCK: + return i40evf_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** * i40evf_open - Called when a network interface is made active * @netdev: network interface device structure * @@ -2236,7 +3025,12 @@ static int i40evf_open(struct net_device *netdev) if (err) goto err_req_irq; + spin_lock_bh(&adapter->mac_vlan_list_lock); + i40evf_add_filter(adapter, adapter->hw.mac.addr); + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + i40evf_configure(adapter); i40evf_up_complete(adapter); @@ -2457,6 +3251,7 @@ static const struct net_device_ops i40evf_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40evf_netpoll, #endif + .ndo_setup_tc = i40evf_setup_tc, }; /** @@ -2571,6 +3366,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter) if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); + /* Enable cloud filter if ADQ is supported */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) + hw_features |= NETIF_F_HW_TC; netdev->hw_features |= hw_features; @@ -2938,9 +3736,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mutex_init(&hw->aq.arq_mutex); spin_lock_init(&adapter->mac_vlan_list_lock); + spin_lock_init(&adapter->cloud_filter_list_lock); INIT_LIST_HEAD(&adapter->mac_filter_list); INIT_LIST_HEAD(&adapter->vlan_filter_list); + INIT_LIST_HEAD(&adapter->cloud_filter_list); INIT_WORK(&adapter->reset_task, i40evf_reset_task); INIT_WORK(&adapter->adminq_task, i40evf_adminq_task); @@ -3065,7 +3865,9 @@ static void i40evf_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40evf_vlan_filter *vlf, *vlftmp; struct i40evf_mac_filter *f, *ftmp; + struct i40evf_cloud_filter *cf, *cftmp; struct i40e_hw *hw = &adapter->hw; int err; /* Indicate we are in remove and not to run reset_task */ @@ -3087,6 +3889,7 @@ static void i40evf_remove(struct pci_dev *pdev) /* Shut down all the garbage mashers on the detention level */ adapter->state = __I40EVF_REMOVE; adapter->aq_required = 0; + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; i40evf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ @@ -3127,13 +3930,21 @@ static void i40evf_remove(struct pci_dev *pdev) list_del(&f->list); kfree(f); } - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - list_del(&f->list); - kfree(f); + list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, + list) { + list_del(&vlf->list); + kfree(vlf); } spin_unlock_bh(&adapter->mac_vlan_list_lock); + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 50ce0d6c09ef..3c76c817ca1a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -161,7 +161,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | VIRTCHNL_VF_OFFLOAD_ENCAP | VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | - VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | + VIRTCHNL_VF_OFFLOAD_ADQ; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; @@ -344,6 +345,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter) void i40evf_map_queues(struct i40evf_adapter *adapter) { struct virtchnl_irq_map_info *vimi; + struct virtchnl_vector_map *vecmap; int v_idx, q_vectors, len; struct i40e_q_vector *q_vector; @@ -367,17 +369,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = adapter->q_vectors + v_idx; - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; - vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; - vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask; + q_vector = &adapter->q_vectors[v_idx]; + vecmap = &vimi->vecmap[v_idx]; + + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = v_idx + NONQ_VECS; + vecmap->txq_map = q_vector->ring_mask; + vecmap->rxq_map = q_vector->ring_mask; + vecmap->rxitr_idx = I40E_RX_ITR; + vecmap->txitr_idx = I40E_TX_ITR; } /* Misc vector last - this is only for AdminQ messages */ - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = 0; - vimi->vecmap[v_idx].txq_map = 0; - vimi->vecmap[v_idx].rxq_map = 0; + vecmap = &vimi->vecmap[v_idx]; + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = 0; + vecmap->txq_map = 0; + vecmap->rxq_map = 0; adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS; i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, @@ -459,7 +466,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) more = true; } - veal = kzalloc(len, GFP_KERNEL); + veal = kzalloc(len, GFP_ATOMIC); if (!veal) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -532,7 +539,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) (count * sizeof(struct virtchnl_ether_addr)); more = true; } - veal = kzalloc(len, GFP_KERNEL); + veal = kzalloc(len, GFP_ATOMIC); if (!veal) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -606,7 +613,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) (count * sizeof(u16)); more = true; } - vvfl = kzalloc(len, GFP_KERNEL); + vvfl = kzalloc(len, GFP_ATOMIC); if (!vvfl) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -678,7 +685,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) (count * sizeof(u16)); more = true; } - vvfl = kzalloc(len, GFP_KERNEL); + vvfl = kzalloc(len, GFP_ATOMIC); if (!vvfl) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -967,6 +974,205 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter) } /** + * i40evf_enable_channel + * @adapter: adapter structure + * + * Request that the PF enable channels as specified by + * the user via tc tool. + **/ +void i40evf_enable_channels(struct i40evf_adapter *adapter) +{ + struct virtchnl_tc_info *vti = NULL; + u16 len; + int i; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) + + sizeof(struct virtchnl_tc_info); + + vti = kzalloc(len, GFP_KERNEL); + if (!vti) + return; + vti->num_tc = adapter->num_tc; + for (i = 0; i < vti->num_tc; i++) { + vti->list[i].count = adapter->ch_config.ch_info[i].count; + vti->list[i].offset = adapter->ch_config.ch_info[i].offset; + vti->list[i].pad = 0; + vti->list[i].max_tx_rate = + adapter->ch_config.ch_info[i].max_tx_rate; + } + + adapter->ch_config.state = __I40EVF_TC_RUNNING; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS; + adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS; + i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS, + (u8 *)vti, len); + kfree(vti); +} + +/** + * i40evf_disable_channel + * @adapter: adapter structure + * + * Request that the PF disable channels that are configured + **/ +void i40evf_disable_channels(struct i40evf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + adapter->ch_config.state = __I40EVF_TC_INVALID; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS; + adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS; + i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS, + NULL, 0); +} + +/** + * i40evf_print_cloud_filter + * @adapter: adapter structure + * @f: cloud filter to print + * + * Print the cloud filter + **/ +static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter, + struct virtchnl_filter *f) +{ + switch (f->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n", + &f->data.tcp_spec.dst_mac, + &f->data.tcp_spec.src_mac, + ntohs(f->data.tcp_spec.vlan_id), + &f->data.tcp_spec.dst_ip[0], + &f->data.tcp_spec.src_ip[0], + ntohs(f->data.tcp_spec.dst_port), + ntohs(f->data.tcp_spec.src_port)); + break; + case VIRTCHNL_TCP_V6_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n", + &f->data.tcp_spec.dst_mac, + &f->data.tcp_spec.src_mac, + ntohs(f->data.tcp_spec.vlan_id), + &f->data.tcp_spec.dst_ip, + &f->data.tcp_spec.src_ip, + ntohs(f->data.tcp_spec.dst_port), + ntohs(f->data.tcp_spec.src_port)); + break; + } +} + +/** + * i40evf_add_cloud_filter + * @adapter: adapter structure + * + * Request that the PF add cloud filters as specified + * by the user via tc tool. + **/ +void i40evf_add_cloud_filter(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->add = false; + cf->state = __I40EVF_CF_ADD_PENDING; + i40evf_send_pf_msg(adapter, + VIRTCHNL_OP_ADD_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** + * i40evf_del_cloud_filter + * @adapter: adapter structure + * + * Request that the PF delete cloud filters as specified + * by the user via tc tool. + **/ +void i40evf_del_cloud_filter(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf, *cftmp; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->del) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + if (cf->del) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->del = false; + cf->state = __I40EVF_CF_DEL_PENDING; + i40evf_send_pf_msg(adapter, + VIRTCHNL_OP_DEL_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** * i40evf_request_reset * @adapter: adapter structure * @@ -1011,14 +1217,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (adapter->link_up == link_up) break; - /* If we get link up message and start queues before - * our queues are configured it will trigger a TX hang. - * In that case, just ignore the link status message, - * we'll get another one after we enable queues and - * actually prepared to send traffic. - */ - if (link_up && adapter->state != __I40EVF_RUNNING) - break; + if (link_up) { + /* If we get link up message and start queues + * before our queues are configured it will + * trigger a TX hang. In that case, just ignore + * the link status message,we'll get another one + * after we enable queues and actually prepared + * to send traffic. + */ + if (adapter->state != __I40EVF_RUNNING) + break; + + /* For ADq enabled VF, we reconfigure VSIs and + * re-allocate queues. Hence wait till all + * queues are enabled. + */ + if (adapter->flags & + I40EVF_FLAG_QUEUES_DISABLED) + break; + } adapter->link_up = link_up; if (link_up) { @@ -1031,7 +1248,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, i40evf_print_link_message(adapter); break; case VIRTCHNL_EVENT_RESET_IMPENDING: - dev_info(&adapter->pdev->dev, "PF reset warning received\n"); + dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n"); if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) { adapter->flags |= I40EVF_FLAG_RESET_PENDING; dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); @@ -1063,6 +1280,57 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", i40evf_stat_str(&adapter->hw, v_retval)); break; + case VIRTCHNL_OP_ENABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __I40EVF_TC_INVALID; + netdev_reset_tc(netdev); + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __I40EVF_TC_RUNNING; + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, + &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_ADD_PENDING) { + cf->state = __I40EVF_CF_INVALID; + dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n", + i40evf_stat_str(&adapter->hw, + v_retval)); + i40evf_print_cloud_filter(adapter, + &cf->f); + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_DEL_PENDING) { + cf->state = __I40EVF_CF_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n", + i40evf_stat_str(&adapter->hw, + v_retval)); + i40evf_print_cloud_filter(adapter, + &cf->f); + } + } + } + break; default: dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", v_retval, @@ -1102,6 +1370,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ i40evf_irq_enable(adapter, true); + adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED; break; case VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); @@ -1156,6 +1425,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, } } break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->state == __I40EVF_CF_ADD_PENDING) + cf->state = __I40EVF_CF_ACTIVE; + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_DEL_PENDING) { + cf->state = __I40EVF_CF_INVALID; + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 1c6b8d9176a8..55d6f17d5799 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -109,6 +109,7 @@ struct vf_data_storage { u16 pf_qos; u16 tx_rate; bool spoofchk_enabled; + bool trusted; }; /* Number of unicast MAC filters reserved for the PF in the RAR registers */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b88fae785369..715bb32e6901 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -190,6 +190,8 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); +static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, + bool setting); static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); static void igb_check_vf_rate_limit(struct igb_adapter *); @@ -774,8 +776,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg) if (!(~value) && (!reg || !(~readl(hw_addr)))) { struct net_device *netdev = igb->netdev; hw->hw_addr = NULL; - netif_device_detach(netdev); - netdev_err(netdev, "PCIe link lost, device now detached\n"); + netdev_err(netdev, "PCIe link lost\n"); } return value; @@ -2527,6 +2528,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, .ndo_set_vf_rate = igb_ndo_set_vf_bw, .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, + .ndo_set_vf_trust = igb_ndo_set_vf_trust, .ndo_get_vf_config = igb_ndo_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = igb_netpoll, @@ -5747,7 +5749,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); - if (adapter->tstamp_config.tx_type & HWTSTAMP_TX_ON && + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; @@ -6383,6 +6385,9 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf) /* By default spoof check is enabled for all VFs */ adapter->vf_data[vf].spoofchk_enabled = true; + /* By default VFs are not trusted */ + adapter->vf_data[vf].trusted = false; + return 0; } @@ -6940,13 +6945,13 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, } break; case E1000_VF_MAC_FILTER_ADD: - if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) { + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { dev_warn(&pdev->dev, "VF %d requested MAC filter but is administratively denied\n", vf); return -EINVAL; } - if (!is_valid_ether_addr(addr)) { dev_warn(&pdev->dev, "VF %d attempted to set invalid MAC filter\n", @@ -6998,7 +7003,8 @@ static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) int ret = 0; if (!info) { - if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) { + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { dev_warn(&pdev->dev, "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n", vf); @@ -8934,6 +8940,22 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, return 0; } +static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + if (adapter->vf_data[vf].trusted == setting) + return 0; + + adapter->vf_data[vf].trusted = setting; + + dev_info(&adapter->pdev->dev, "VF %u is %strusted\n", + vf, setting ? "" : "not "); + return 0; +} + static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi) { @@ -8947,6 +8969,7 @@ static int igb_ndo_get_vf_config(struct net_device *netdev, ivi->vlan = adapter->vf_data[vf].pf_vlan; ivi->qos = adapter->vf_data[vf].pf_qos; ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; + ivi->trusted = adapter->vf_data[vf].trusted; return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 221f15803480..c0e6ab42e0e1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -97,6 +97,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = { {"tx_heartbeat_errors", IXGBE_NETDEV_STAT(tx_heartbeat_errors)}, {"tx_timeout_count", IXGBE_STAT(tx_timeout_count)}, {"tx_restart_queue", IXGBE_STAT(restart_queue)}, + {"rx_length_errors", IXGBE_STAT(stats.rlec)}, {"rx_long_length_errors", IXGBE_STAT(stats.roc)}, {"rx_short_length_errors", IXGBE_STAT(stats.ruc)}, {"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)}, @@ -3059,6 +3060,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir, for (i = 0; i < reta_entries; i++) adapter->rss_indir_tbl[i] = indir[i]; + + ixgbe_store_reta(adapter); } /* Fill out the rss hash key */ @@ -3067,8 +3070,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir, ixgbe_store_key(adapter); } - ixgbe_store_reta(adapter); - return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 93eacddb6704..f2254528dcfc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -423,15 +423,21 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, const char aes_gcm_name[] = "rfc4106(gcm(aes))"; int key_len; - if (xs->aead) { - key_data = &xs->aead->alg_key[0]; - key_len = xs->aead->alg_key_len; - alg_name = xs->aead->alg_name; - } else { + if (!xs->aead) { netdev_err(dev, "Unsupported IPsec algorithm\n"); return -EINVAL; } + if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) { + netdev_err(dev, "IPsec offload requires %d bit authentication\n", + IXGBE_IPSEC_AUTH_BITS); + return -EINVAL; + } + + key_data = &xs->aead->alg_key[0]; + key_len = xs->aead->alg_key_len; + alg_name = xs->aead->alg_name; + if (strcmp(alg_name, aes_gcm_name)) { netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n", aes_gcm_name); @@ -718,23 +724,10 @@ static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) return true; } -/** - * ixgbe_ipsec_free - called by xfrm garbage collections - * @xs: pointer to transformer state struct - * - * We don't have any garbage to collect, so we shouldn't bother - * implementing this function, but the XFRM code doesn't check for - * existence before calling the API callback. - **/ -static void ixgbe_ipsec_free(struct xfrm_state *xs) -{ -} - static const struct xfrmdev_ops ixgbe_xfrmdev_ops = { .xdo_dev_state_add = ixgbe_ipsec_add_sa, .xdo_dev_state_delete = ixgbe_ipsec_del_sa, .xdo_dev_offload_ok = ixgbe_ipsec_offload_ok, - .xdo_dev_state_free = ixgbe_ipsec_free, }; /** @@ -783,11 +776,33 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, itd->flags = 0; if (xs->id.proto == IPPROTO_ESP) { + struct sk_buff *skb = first->skb; + int ret, authlen, trailerlen; + u8 padlen; + itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP | IXGBE_ADVTXD_TUCMD_L4T_TCP; if (first->protocol == htons(ETH_P_IP)) itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4; - itd->trailer_len = xs->props.trailer_len; + + /* The actual trailer length is authlen (16 bytes) plus + * 2 bytes for the proto and the padlen values, plus + * padlen bytes of padding. This ends up not the same + * as the static value found in xs->props.trailer_len (21). + * + * The "correct" way to get the auth length would be to use + * authlen = crypto_aead_authsize(xs->data); + * but since we know we only have one size to worry about + * we can let the compiler use the constant and save us a + * few CPU cycles. + */ + authlen = IXGBE_IPSEC_AUTH_BITS / 8; + + ret = skb_copy_bits(skb, skb->len - (authlen + 2), &padlen, 1); + if (unlikely(ret)) + return 0; + trailerlen = authlen + 2 + padlen; + itd->trailer_len = trailerlen; } if (tsa->encrypt) itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h index da3ce7849e85..87d2800b94ab 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h @@ -32,6 +32,7 @@ #define IXGBE_IPSEC_MAX_RX_IP_COUNT 128 #define IXGBE_IPSEC_BASE_RX_INDEX 0 #define IXGBE_IPSEC_BASE_TX_INDEX IXGBE_IPSEC_MAX_SA_COUNT +#define IXGBE_IPSEC_AUTH_BITS 128 #define IXGBE_RXTXIDX_IPS_EN 0x00000001 #define IXGBE_RXIDX_TBL_SHIFT 1 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 4242f0213e46..ed4cbe94c355 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter) return false; /* start at VMDq register offset for SR-IOV enabled setups */ - pool = 0; reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) { /* If we are greater than indices move to next pool */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9fc063af233c..85369423452d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7711,7 +7711,8 @@ static void ixgbe_service_task(struct work_struct *work) if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) { ixgbe_ptp_overflow_check(adapter); - ixgbe_ptp_rx_hang(adapter); + if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER) + ixgbe_ptp_rx_hang(adapter); ixgbe_ptp_tx_hang(adapter); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 27a70a52f3c9..008aa073a679 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -831,7 +831,11 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg); /* force drop enable for all VF Rx queues */ - ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE); + reg = IXGBE_QDE_ENABLE; + if (adapter->vfinfo[vf].pf_vlan) + reg |= IXGBE_QDE_HIDE_VLAN; + + ixgbe_write_qde(adapter, vf, reg); /* enable receive for vf */ reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 4400e49090b4..e7623fed42da 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -94,6 +94,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) +static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0) + "legacy-rx", +}; + +#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings) + static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -241,6 +248,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev, sizeof(drvinfo->version)); strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); + + drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN; } static void ixgbevf_get_ringparam(struct net_device *netdev, @@ -392,6 +401,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset) return IXGBEVF_TEST_LEN; case ETH_SS_STATS: return IXGBEVF_STATS_LEN; + case ETH_SS_PRIV_FLAGS: + return IXGBEVF_PRIV_FLAGS_STR_LEN; default: return -EINVAL; } @@ -496,6 +507,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, ixgbevf_priv_flags_strings, + IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); + break; } } @@ -888,6 +903,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, return err; } +static u32 ixgbevf_get_priv_flags(struct net_device *netdev) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) + priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX; + + return priv_flags; +} + +static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + unsigned int flags = adapter->flags; + + flags &= ~IXGBEVF_FLAGS_LEGACY_RX; + if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX) + flags |= IXGBEVF_FLAGS_LEGACY_RX; + + if (flags != adapter->flags) { + adapter->flags = flags; + + /* reset interface to repopulate queues */ + if (netif_running(netdev)) + ixgbevf_reinit_locked(adapter); + } + + return 0; +} + static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_drvinfo = ixgbevf_get_drvinfo, .get_regs_len = ixgbevf_get_regs_len, @@ -909,6 +955,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_rxfh_key_size = ixgbevf_get_rxfh_key_size, .get_rxfh = ixgbevf_get_rxfh, .get_link_ksettings = ixgbevf_get_link_ksettings, + .get_priv_flags = ixgbevf_get_priv_flags, + .set_priv_flags = ixgbevf_set_priv_flags, }; void ixgbevf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index f6952425c87d..f65ca156af2d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -89,19 +89,15 @@ struct ixgbevf_rx_queue_stats { }; enum ixgbevf_ring_state_t { + __IXGBEVF_RX_3K_BUFFER, + __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, }; -#define check_for_tx_hang(ring) \ - test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) -#define set_check_for_tx_hang(ring) \ - set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) -#define clear_check_for_tx_hang(ring) \ - clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) - struct ixgbevf_ring { struct ixgbevf_ring *next; + struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; struct device *dev; void *desc; /* descriptor ring memory */ @@ -133,7 +129,7 @@ struct ixgbevf_ring { */ u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ -}; +} ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define IXGBEVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */ @@ -156,12 +152,20 @@ struct ixgbevf_ring { /* Supported Rx Buffer Sizes */ #define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ #define IXGBEVF_RXBUFFER_2048 2048 +#define IXGBEVF_RXBUFFER_3072 3072 #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 -#define IXGBEVF_RX_BUFSZ IXGBEVF_RXBUFFER_2048 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) +#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#if (PAGE_SIZE < 8192) +#define IXGBEVF_MAX_FRAME_BUILD_SKB \ + (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD) +#else +#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048 +#endif + #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) #define IXGBE_TX_FLAGS_TSO BIT(2) @@ -170,6 +174,50 @@ struct ixgbevf_ring { #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 +#define ring_uses_large_buffer(ring) \ + test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) +#define set_ring_uses_large_buffer(ring) \ + set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) +#define clear_ring_uses_large_buffer(ring) \ + clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) + +#define ring_uses_build_skb(ring) \ + test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) +#define set_ring_build_skb_enabled(ring) \ + set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) +#define clear_ring_build_skb_enabled(ring) \ + clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) + +static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring_uses_large_buffer(ring)) + return IXGBEVF_RXBUFFER_3072; + + if (ring_uses_build_skb(ring)) + return IXGBEVF_MAX_FRAME_BUILD_SKB; +#endif + return IXGBEVF_RXBUFFER_2048; +} + +static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring_uses_large_buffer(ring)) + return 1; +#endif + return 0; +} + +#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring)) + +#define check_for_tx_hang(ring) \ + test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) +#define set_check_for_tx_hang(ring) \ + set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) +#define clear_check_for_tx_hang(ring) \ + clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) + struct ixgbevf_ring_container { struct ixgbevf_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -194,7 +242,11 @@ struct ixgbevf_q_vector { u16 itr; /* Interrupt throttle rate written to EITR */ struct napi_struct napi; struct ixgbevf_ring_container rx, tx; + struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; + + /* for dynamic allocation of rings associated with this q_vector */ + struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define IXGBEVF_QV_STATE_IDLE 0 @@ -331,6 +383,8 @@ struct ixgbevf_adapter { u32 *rss_key; u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE]; + u32 flags; +#define IXGBEVF_FLAGS_LEGACY_RX BIT(1) }; enum ixbgevf_state_t { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 9b3d43d28106..4da449e0a4ba 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -130,6 +130,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter); static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); +static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer); +static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *old_buff); static void ixgbevf_remove_adapter(struct ixgbe_hw *hw) { @@ -527,6 +530,49 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, skb->protocol = eth_type_trans(skb, rx_ring->netdev); } +static +struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, + const unsigned int size) +{ + struct ixgbevf_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); + + rx_buffer->pagecnt_bias--; + + return rx_buffer; +} + +static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer) +{ + if (ixgbevf_can_reuse_rx_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbevf_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbevf_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBEVF_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; +} + /** * ixgbevf_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed @@ -554,32 +600,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, return true; } +static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0; +} + static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t dma = bi->dma; + dma_addr_t dma; /* since we are recycling buffers we should seldom need to alloc */ if (likely(page)) return true; /* alloc new page for storage */ - page = dev_alloc_page(); + page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + ixgbevf_rx_pg_size(rx_ring), DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); + __free_pages(page, ixgbevf_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_rx_page_failed++; return false; @@ -587,7 +639,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = ixgbevf_rx_offset(rx_ring); bi->pagecnt_bias = 1; rx_ring->rx_stats.alloc_rx_page++; @@ -621,7 +673,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - IXGBEVF_RX_BUFSZ, + ixgbevf_rx_bufsz(rx_ring), DMA_FROM_DEVICE); /* Refresh the desc even if pkt_addr didn't change @@ -734,11 +786,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, - struct page *page, - const unsigned int truesize) +static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) { - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--; + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; /* avoid re-using remote pages */ if (unlikely(ixgbevf_page_is_reserved(page))) @@ -746,17 +797,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_ref_count(page) != pagecnt_bias)) + if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) return false; - - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ; - #else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; +#define IXGBEVF_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048) - if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ)) + if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET) return false; #endif @@ -765,7 +812,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds. */ - if (unlikely(pagecnt_bias == 1)) { + if (unlikely(!pagecnt_bias)) { page_ref_add(page, USHRT_MAX); rx_buffer->pagecnt_bias = USHRT_MAX; } @@ -777,127 +824,81 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into + * @size: size of buffer to be added * * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. **/ -static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, +static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *rx_buffer, - u16 size, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) - unsigned int truesize = IXGBEVF_RX_BUFSZ; + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : + SKB_DATA_ALIGN(size); +#endif + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; #endif - unsigned int pull_len; - - if (unlikely(skb_is_nonlinear(skb))) - goto add_tail_frag; - - if (likely(size <= IXGBEVF_RX_HDR_SIZE)) { - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - /* page is not reserved, we can reuse buffer as is */ - if (likely(!ixgbevf_page_is_reserved(page))) - return true; - - /* this page cannot be reused so discard it */ - return false; - } - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - va += pull_len; - size -= pull_len; - -add_tail_frag: - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - (unsigned long)va & ~PAGE_MASK, size, truesize); - - return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize); } -static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) +static +struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) { - struct ixgbevf_rx_buffer *rx_buffer; - struct page *page; - u16 size = le16_to_cpu(rx_desc->wb.upper.length); - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + unsigned int headlen; + struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(page_addr); + /* prefetch first cache line of first page */ + prefetch(va); #if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); + prefetch(va + L1_CACHE_BYTES); #endif - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IXGBEVF_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return NULL; - } + /* allocate a skb to store the frags */ + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); + if (unlikely(!skb)) + return NULL; - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - } + /* Determine available headroom for copy */ + headlen = size; + if (headlen > IXGBEVF_RX_HDR_SIZE) + headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE); - /* pull page into skb */ - if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) { - /* hand second half of page back to the ring */ - ixgbevf_reuse_rx_page(rx_ring, rx_buffer); + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* update all of the pointers */ + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + (va + headlen) - page_address(rx_buffer->page), + size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif } else { - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - __page_frag_cache_drain(page, rx_buffer->pagecnt_bias); + rx_buffer->pagecnt_bias++; } - /* clear contents of buffer_info */ - rx_buffer->dma = 0; - rx_buffer->page = NULL; - return skb; } @@ -909,6 +910,44 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } +static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* build an skb to around the page buffer */ + skb = build_skb(va - IXGBEVF_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, IXGBEVF_SKB_PAD); + __skb_put(skb, size); + + /* update buffer offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + + return skb; +} static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) @@ -919,6 +958,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; + struct ixgbevf_rx_buffer *rx_buffer; + unsigned int size; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) { @@ -927,8 +968,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); - - if (!rx_desc->wb.upper.length) + size = le16_to_cpu(rx_desc->wb.upper.length); + if (!size) break; /* This memory barrier is needed to keep us from reading @@ -937,15 +978,26 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); + rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); + /* retrieve a buffer from the ring */ - skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb); + if (skb) + ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = ixgbevf_build_skb(rx_ring, rx_buffer, + rx_desc, size); + else + skb = ixgbevf_construct_skb(rx_ring, rx_buffer, + rx_desc, size); /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_buffer->pagecnt_bias++; break; } + ixgbevf_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1260,85 +1312,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data) return IRQ_HANDLED; } -static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx, - int r_idx) -{ - struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx]; - - a->rx_ring[r_idx]->next = q_vector->rx.ring; - q_vector->rx.ring = a->rx_ring[r_idx]; - q_vector->rx.count++; -} - -static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx, - int t_idx) -{ - struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx]; - - a->tx_ring[t_idx]->next = q_vector->tx.ring; - q_vector->tx.ring = a->tx_ring[t_idx]; - q_vector->tx.count++; -} - -/** - * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors - * @adapter: board private structure to initialize - * - * This function maps descriptor rings to the queue-specific vectors - * we were allotted through the MSI-X enabling code. Ideally, we'd have - * one vector per ring/queue, but on a constrained vector budget, we - * group the rings as "efficiently" as possible. You would add new - * mapping configurations in here. - **/ -static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) -{ - int q_vectors; - int v_start = 0; - int rxr_idx = 0, txr_idx = 0; - int rxr_remaining = adapter->num_rx_queues; - int txr_remaining = adapter->num_tx_queues; - int i, j; - int rqpv, tqpv; - - q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - /* The ideal configuration... - * We have enough vectors to map one per queue. - */ - if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) { - for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) - map_vector_to_rxq(adapter, v_start, rxr_idx); - - for (; txr_idx < txr_remaining; v_start++, txr_idx++) - map_vector_to_txq(adapter, v_start, txr_idx); - return 0; - } - - /* If we don't have enough vectors for a 1-to-1 - * mapping, we'll have to group them so there are - * multiple queues per vector. - */ - /* Re-adjusting *qpv takes care of the remainder. */ - for (i = v_start; i < q_vectors; i++) { - rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i); - for (j = 0; j < rqpv; j++) { - map_vector_to_rxq(adapter, i, rxr_idx); - rxr_idx++; - rxr_remaining--; - } - } - for (i = v_start; i < q_vectors; i++) { - tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i); - for (j = 0; j < tqpv; j++) { - map_vector_to_txq(adapter, i, txr_idx); - txr_idx++; - txr_remaining--; - } - } - - return 0; -} - /** * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts * @adapter: board private structure @@ -1411,20 +1384,6 @@ free_queue_irqs: return err; } -static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter) -{ - int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - for (i = 0; i < q_vectors; i++) { - struct ixgbevf_q_vector *q_vector = adapter->q_vector[i]; - - q_vector->rx.ring = NULL; - q_vector->tx.ring = NULL; - q_vector->rx.count = 0; - q_vector->tx.count = 0; - } -} - /** * ixgbevf_request_irq - initialize interrupts * @adapter: board private structure @@ -1464,8 +1423,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter) free_irq(adapter->msix_entries[i].vector, adapter->q_vector[i]); } - - ixgbevf_reset_q_vectors(adapter); } /** @@ -1587,7 +1544,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter) #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 -static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) +static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring, int index) { struct ixgbe_hw *hw = &adapter->hw; u32 srrctl; @@ -1595,7 +1553,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + if (ring_uses_large_buffer(ring)) + srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1767,10 +1728,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ring->next_to_use = 0; ring->next_to_alloc = 0; - ixgbevf_configure_srrctl(adapter, reg_idx); + ixgbevf_configure_srrctl(adapter, ring, reg_idx); - /* allow any size packet since we can handle overflow */ - rxdctl &= ~IXGBE_RXDCTL_RLPML_EN; + /* RXDCTL.RLPML does not work on 82599 */ + if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | + IXGBE_RXDCTL_RLPML_EN); + +#if (PAGE_SIZE < 8192) + /* Limit the maximum frame size so we don't overrun the skb */ + if (ring_uses_build_skb(ring) && + !ring_uses_large_buffer(ring)) + rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | + IXGBE_RXDCTL_RLPML_EN; +#endif + } rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); @@ -1779,6 +1751,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); } +static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) +{ + struct net_device *netdev = adapter->netdev; + unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + + /* set build_skb and buffer size flags */ + clear_ring_build_skb_enabled(rx_ring); + clear_ring_uses_large_buffer(rx_ring); + + if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) + return; + + set_ring_build_skb_enabled(rx_ring); + + if (PAGE_SIZE < 8192) { + if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB) + return; + + set_ring_uses_large_buffer(rx_ring); + } +} + /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1806,8 +1801,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) - ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; + + ixgbevf_set_rx_buffer_len(adapter, rx_ring); + ixgbevf_configure_rx_ring(adapter, rx_ring); + } } static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, @@ -2136,13 +2135,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - IXGBEVF_RX_BUFSZ, + ixgbevf_rx_bufsz(rx_ring), DMA_FROM_DEVICE); /* free resources associated with mapping */ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, + ixgbevf_rx_pg_size(rx_ring), DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); @@ -2405,105 +2404,171 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) } /** - * ixgbevf_alloc_queues - Allocate memory for all rings + * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported + * @adapter: board private structure to initialize + * + * Attempt to configure the interrupts using the best available + * capabilities of the hardware and the kernel. + **/ +static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) +{ + int vector, v_budget; + + /* It's easy to be greedy for MSI-X vectors, but it really + * doesn't do us much good if we have a lot more vectors + * than CPU's. So let's be conservative and only ask for + * (roughly) the same number of vectors as there are CPU's. + * The default is to use pairs of vectors. + */ + v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues); + v_budget = min_t(int, v_budget, num_online_cpus()); + v_budget += NON_Q_VECTORS; + + adapter->msix_entries = kcalloc(v_budget, + sizeof(struct msix_entry), GFP_KERNEL); + if (!adapter->msix_entries) + return -ENOMEM; + + for (vector = 0; vector < v_budget; vector++) + adapter->msix_entries[vector].entry = vector; + + /* A failure in MSI-X entry allocation isn't fatal, but the VF driver + * does not support any other modes, so we will simply fail here. Note + * that we clean up the msix_entries pointer else-where. + */ + return ixgbevf_acquire_msix_vectors(adapter, v_budget); +} + +static void ixgbevf_add_ring(struct ixgbevf_ring *ring, + struct ixgbevf_ring_container *head) +{ + ring->next = head->ring; + head->ring = ring; + head->count++; +} + +/** + * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector * @adapter: board private structure to initialize + * @v_idx: index of vector in adapter struct + * @txr_count: number of Tx rings for q vector + * @txr_idx: index of first Tx ring to assign + * @rxr_count: number of Rx rings for q vector + * @rxr_idx: index of first Rx ring to assign * - * We allocate one ring per queue at run-time since we don't know the - * number of queues at compile-time. The polling_netdev array is - * intended for Multiqueue, but should work fine with a single queue. + * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ -static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter) +static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, + int txr_count, int txr_idx, + int rxr_count, int rxr_idx) { + struct ixgbevf_q_vector *q_vector; struct ixgbevf_ring *ring; - int rx = 0, tx = 0; + int ring_count, size; - for (; tx < adapter->num_tx_queues; tx++) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; + ring_count = txr_count + rxr_count; + size = sizeof(*q_vector) + (sizeof(*ring) * ring_count); + /* allocate q_vector and rings */ + q_vector = kzalloc(size, GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + q_vector->v_idx = v_idx; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + while (txr_count) { + /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + ixgbevf_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ ring->count = adapter->tx_ring_count; - ring->queue_index = tx; - ring->reg_idx = tx; + ring->queue_index = txr_idx; + ring->reg_idx = txr_idx; - adapter->tx_ring[tx] = ring; - } + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; - for (; rx < adapter->num_rx_queues; rx++) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; + /* update count and index */ + txr_count--; + txr_idx++; + /* push pointer to next ring */ + ring++; + } + + while (rxr_count) { + /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + ixgbevf_add_ring(ring, &q_vector->rx); + + /* apply Rx specific ring traits */ ring->count = adapter->rx_ring_count; - ring->queue_index = rx; - ring->reg_idx = rx; + ring->queue_index = rxr_idx; + ring->reg_idx = rxr_idx; - adapter->rx_ring[rx] = ring; - } + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; - return 0; + /* update count and index */ + rxr_count--; + rxr_idx++; -err_allocation: - while (tx) { - kfree(adapter->tx_ring[--tx]); - adapter->tx_ring[tx] = NULL; + /* push pointer to next ring */ + ring++; } - while (rx) { - kfree(adapter->rx_ring[--rx]); - adapter->rx_ring[rx] = NULL; - } - return -ENOMEM; + return 0; } /** - * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported + * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector * @adapter: board private structure to initialize + * @v_idx: index of vector in adapter struct * - * Attempt to configure the interrupts using the best available - * capabilities of the hardware and the kernel. + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. **/ -static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) +static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx) { - struct net_device *netdev = adapter->netdev; - int err; - int vector, v_budget; - - /* It's easy to be greedy for MSI-X vectors, but it really - * doesn't do us much good if we have a lot more vectors - * than CPU's. So let's be conservative and only ask for - * (roughly) the same number of vectors as there are CPU's. - * The default is to use pairs of vectors. - */ - v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues); - v_budget = min_t(int, v_budget, num_online_cpus()); - v_budget += NON_Q_VECTORS; + struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx]; + struct ixgbevf_ring *ring; - /* A failure in MSI-X entry allocation isn't fatal, but it does - * mean we disable MSI-X capabilities of the adapter. - */ - adapter->msix_entries = kcalloc(v_budget, - sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) - return -ENOMEM; + ixgbevf_for_each_ring(ring, q_vector->tx) + adapter->tx_ring[ring->queue_index] = NULL; - for (vector = 0; vector < v_budget; vector++) - adapter->msix_entries[vector].entry = vector; + ixgbevf_for_each_ring(ring, q_vector->rx) + adapter->rx_ring[ring->queue_index] = NULL; - err = ixgbevf_acquire_msix_vectors(adapter, v_budget); - if (err) - return err; + adapter->q_vector[v_idx] = NULL; + netif_napi_del(&q_vector->napi); - err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); - if (err) - return err; - - return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + /* ixgbevf_get_stats() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + kfree_rcu(q_vector, rcu); } /** @@ -2515,35 +2580,53 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) **/ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) { - int q_idx, num_q_vectors; - struct ixgbevf_q_vector *q_vector; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++, q_vectors--) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); + + err = ixgbevf_alloc_q_vector(adapter, v_idx, + 0, 0, rqpv, rxr_idx); + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + rxr_idx += rqpv; + } + } + + for (; q_vectors; v_idx++, q_vectors--) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors); - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + err = ixgbevf_alloc_q_vector(adapter, v_idx, + tqpv, txr_idx, + rqpv, rxr_idx); - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL); - if (!q_vector) + if (err) goto err_out; - q_vector->adapter = adapter; - q_vector->v_idx = q_idx; - netif_napi_add(adapter->netdev, &q_vector->napi, - ixgbevf_poll, 64); - adapter->q_vector[q_idx] = q_vector; + + /* update counts and index */ + rxr_remaining -= rqpv; + rxr_idx += rqpv; + txr_remaining -= tqpv; + txr_idx += tqpv; } return 0; err_out: - while (q_idx) { - q_idx--; - q_vector = adapter->q_vector[q_idx]; -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_del(&q_vector->napi); -#endif - netif_napi_del(&q_vector->napi); - kfree(q_vector); - adapter->q_vector[q_idx] = NULL; + while (v_idx) { + v_idx--; + ixgbevf_free_q_vector(adapter, v_idx); } + return -ENOMEM; } @@ -2557,17 +2640,11 @@ err_out: **/ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter) { - int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx]; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - adapter->q_vector[q_idx] = NULL; -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_del(&q_vector->napi); -#endif - netif_napi_del(&q_vector->napi); - kfree(q_vector); + while (q_vectors) { + q_vectors--; + ixgbevf_free_q_vector(adapter, q_vectors); } } @@ -2611,12 +2688,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) goto err_alloc_q_vectors; } - err = ixgbevf_alloc_queues(adapter); - if (err) { - pr_err("Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", adapter->num_rx_queues, adapter->num_tx_queues); @@ -2624,8 +2695,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) set_bit(__IXGBEVF_DOWN, &adapter->state); return 0; -err_alloc_queues: - ixgbevf_free_q_vectors(adapter); err_alloc_q_vectors: ixgbevf_reset_interrupt_capability(adapter); err_set_interrupt: @@ -2641,17 +2710,6 @@ err_set_interrupt: **/ static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) { - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) { - kfree(adapter->tx_ring[i]); - adapter->tx_ring[i] = NULL; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - kfree(adapter->rx_ring[i]); - adapter->rx_ring[i] = NULL; - } - adapter->num_tx_queues = 0; adapter->num_rx_queues = 0; @@ -3088,9 +3146,14 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i); - break; + goto err_setup_tx; } + return 0; +err_setup_tx: + /* rewind the index freeing the rings as we go */ + while (i--) + ixgbevf_free_tx_resources(adapter->tx_ring[i]); return err; } @@ -3148,8 +3211,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i); - break; + goto err_setup_rx; } + + return 0; +err_setup_rx: + /* rewind the index freeing the rings as we go */ + while (i--) + ixgbevf_free_rx_resources(adapter->rx_ring[i]); return err; } @@ -3244,28 +3313,31 @@ int ixgbevf_open(struct net_device *netdev) ixgbevf_configure(adapter); - /* Map the Tx/Rx rings to the vectors we were allotted. - * if request_irq will be called in this function map_rings - * must be called *before* up_complete - */ - ixgbevf_map_rings_to_vectors(adapter); - err = ixgbevf_request_irq(adapter); if (err) goto err_req_irq; + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + if (err) + goto err_set_queues; + ixgbevf_up_complete(adapter); return 0; +err_set_queues: + ixgbevf_free_irq(adapter); err_req_irq: - ixgbevf_down(adapter); -err_setup_rx: ixgbevf_free_all_rx_resources(adapter); -err_setup_tx: +err_setup_rx: ixgbevf_free_all_tx_resources(adapter); +err_setup_tx: ixgbevf_reset(adapter); - err_setup_reset: return err; @@ -3707,11 +3779,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) return __ixgbevf_maybe_stop_tx(tx_ring, size); } -static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, + struct ixgbevf_ring *tx_ring) { - struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbevf_tx_buffer *first; - struct ixgbevf_ring *tx_ring; int tso; u32 tx_flags = 0; u16 count = TXD_USE_COUNT(skb_headlen(skb)); @@ -3726,8 +3797,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - tx_ring = adapter->tx_ring[skb->queue_mapping]; - /* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, @@ -3780,6 +3849,29 @@ out_drop: return NETDEV_TX_OK; } +static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + struct ixgbevf_ring *tx_ring; + + if (skb->len <= 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* The minimum packet size for olinfo paylen is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + + tx_ring = adapter->tx_ring[skb->queue_mapping]; + return ixgbevf_xmit_frame_ring(skb, tx_ring); +} + /** * ixgbevf_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure @@ -3839,6 +3931,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; + if (netif_running(netdev)) + ixgbevf_reinit_locked(adapter); + return 0; } @@ -3917,17 +4012,11 @@ static int ixgbevf_resume(struct pci_dev *pdev) rtnl_lock(); err = ixgbevf_init_interrupt_scheme(adapter); + if (!err && netif_running(netdev)) + err = ixgbevf_open(netdev); rtnl_unlock(); - if (err) { - dev_err(&pdev->dev, "Cannot initialize interrupts\n"); + if (err) return err; - } - - if (netif_running(netdev)) { - err = ixgbevf_open(netdev); - if (err) - return err; - } netif_device_attach(netdev); @@ -3953,6 +4042,7 @@ static void ixgbevf_get_stats(struct net_device *netdev, stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc; + rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { ring = adapter->rx_ring[i]; do { @@ -3974,6 +4064,7 @@ static void ixgbevf_get_stats(struct net_device *netdev, stats->tx_bytes += bytes; stats->tx_packets += packets; } + rcu_read_unlock(); } #define IXGBEVF_MAX_MAC_HDR_LEN 127 |