diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
36 files changed, 1238 insertions, 1178 deletions
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 04e939226e08..2b7323d392dc 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2253,7 +2253,7 @@ static int e100_poll(struct napi_struct *napi, int budget) /* If budget not fully consumed, exit the polling mode */ if (work_done < budget) { - napi_complete(napi); + napi_complete_done(napi, work_done); e100_enable_irq(nic); } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 79651eb608ff..2175cced402f 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -240,9 +240,9 @@ static void e1000e_dump(struct e1000_adapter *adapter) /* Print netdevice Info */ if (netdev) { dev_info(&adapter->pdev->dev, "Net device Info\n"); - pr_info("Device Name state trans_start last_rx\n"); - pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, - netdev->state, dev_trans_start(netdev), netdev->last_rx); + pr_info("Device Name state trans_start\n"); + pr_info("%-15s %016lX %016lX\n", netdev->name, + netdev->state, dev_trans_start(netdev)); } /* Print Registers */ diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 342007df4663..82d8040fa418 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -134,19 +134,6 @@ /* default to trying for four seconds */ #define I40E_TRY_LINK_TIMEOUT (4 * HZ) -/** - * i40e_is_mac_710 - Return true if MAC is X710/XL710 - * @hw: ptr to the hardware info - **/ -static inline bool i40e_is_mac_710(struct i40e_hw *hw) -{ - if ((hw->mac.type == I40E_MAC_X710) || - (hw->mac.type == I40E_MAC_XL710)) - return true; - - return false; -} - /* driver state flags */ enum i40e_state_t { __I40E_TESTING, @@ -361,6 +348,8 @@ struct i40e_pf { #define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT_ULL(51) #define I40E_FLAG_HAVE_CRT_RETIMER BIT_ULL(52) #define I40E_FLAG_PTP_L4_CAPABLE BIT_ULL(53) +#define I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE BIT_ULL(54) +#define I40E_FLAG_TEMP_LINK_POLLING BIT_ULL(55) /* tracks features that get auto disabled by errors */ u64 auto_disable_flags; @@ -480,6 +469,22 @@ struct i40e_mac_filter { enum i40e_filter_state state; }; +/* Wrapper structure to keep track of filters while we are preparing to send + * firmware commands. We cannot send firmware commands while holding a + * spinlock, since it might sleep. To avoid this, we wrap the added filters in + * a separate structure, which will track the state change and update the real + * filter while under lock. We can't simply hold the filters in a separate + * list, as this opens a window for a race condition when adding new MAC + * addresses to all VLANs, or when adding new VLANs to all MAC addresses. + */ +struct i40e_new_mac_filter { + struct hlist_node hlist; + struct i40e_mac_filter *f; + + /* Track future changes to state separately */ + enum i40e_filter_state state; +}; + struct i40e_veb { struct i40e_pf *pf; u16 idx; @@ -762,6 +767,7 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features); void i40e_set_ethtool_ops(struct net_device *netdev); struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan); +void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f); void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan); int i40e_sync_vsi_filters(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, @@ -804,7 +810,6 @@ int i40e_lan_add_device(struct i40e_pf *pf); int i40e_lan_del_device(struct i40e_pf *pf); void i40e_client_subtask(struct i40e_pf *pf); void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi); -void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi); void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset); void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs); void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id); @@ -852,12 +857,12 @@ int i40e_close(struct net_device *netdev); int i40e_vsi_open(struct i40e_vsi *vsi); void i40e_vlan_stripping_disable(struct i40e_vsi *vsi); int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid); -int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid); +int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid); void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid); -void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid); -struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, - const u8 *macaddr); -int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr); +void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid); +struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi, + const u8 *macaddr); +int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr); bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi); struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr); #ifdef I40E_FCOE diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index b2101a51534c..451f48b7540a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -538,6 +538,8 @@ I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data); /* Manage MAC Address Write Command (0x0108) */ struct i40e_aqc_mac_address_write { __le16 command_flags; +#define I40E_AQC_MC_MAG_EN 0x0100 +#define I40E_AQC_WOL_PRESERVE_ON_PFR 0x0200 #define I40E_AQC_WRITE_TYPE_LAA_ONLY 0x0000 #define I40E_AQC_WRITE_TYPE_LAA_WOL 0x4000 #define I40E_AQC_WRITE_TYPE_PORT 0x8000 diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 7fe72abc0b4a..d570219efd9f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -174,8 +174,6 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) if (!vsi) return; - memset(¶ms, 0, sizeof(params)); - i40e_client_get_params(vsi, ¶ms); mutex_lock(&i40e_client_instance_mutex); list_for_each_entry(cdev, &i40e_client_instances, list) { if (cdev->lan_info.pf == vsi->back) { @@ -186,6 +184,8 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) "Cannot locate client instance l2_param_change routine\n"); continue; } + memset(¶ms, 0, sizeof(params)); + i40e_client_get_params(vsi, ¶ms); if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n"); @@ -201,41 +201,6 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) } /** - * i40e_notify_client_of_netdev_open - call the client open callback - * @vsi: the VSI with netdev opened - * - * If there is a client to this netdev, call the client with open - **/ -void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi) -{ - struct i40e_client_instance *cdev; - int ret = 0; - - if (!vsi) - return; - mutex_lock(&i40e_client_instance_mutex); - list_for_each_entry(cdev, &i40e_client_instances, list) { - if (cdev->lan_info.netdev == vsi->netdev) { - if (!cdev->client || - !cdev->client->ops || !cdev->client->ops->open) { - dev_dbg(&vsi->back->pdev->dev, - "Cannot locate client instance open routine\n"); - continue; - } - if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, - &cdev->state))) { - ret = cdev->client->ops->open(&cdev->lan_info, - cdev->client); - if (!ret) - set_bit(__I40E_CLIENT_INSTANCE_OPENED, - &cdev->state); - } - } - } - mutex_unlock(&i40e_client_instance_mutex); -} - -/** * i40e_client_release_qvlist * @ldev: pointer to L2 context. * @@ -545,9 +510,10 @@ void i40e_client_subtask(struct i40e_pf *pf) continue; if (!existing) { - dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", + dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", client->name, pf->hw.pf_id, - pf->hw.bus.device, pf->hw.bus.func); + pf->hw.bus.bus_id, pf->hw.bus.device, + pf->hw.bus.func); } mutex_lock(&i40e_client_instance_mutex); @@ -596,8 +562,9 @@ int i40e_lan_add_device(struct i40e_pf *pf) ldev->pf = pf; INIT_LIST_HEAD(&ldev->list); list_add(&ldev->list, &i40e_devices); - dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x func=0x%02x\n", - pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); + dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.bus_id, + pf->hw.bus.device, pf->hw.bus.func); /* Since in some cases register may have happened before a device gets * added, we can schedule a subtask to go initiate the clients if @@ -625,9 +592,9 @@ int i40e_lan_del_device(struct i40e_pf *pf) mutex_lock(&i40e_device_mutex); list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) { if (ldev->pf == pf) { - dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x func=0x%02x\n", - pf->hw.pf_id, pf->hw.bus.device, - pf->hw.bus.func); + dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.bus_id, + pf->hw.bus.device, pf->hw.bus.func); list_del(&ldev->list); kfree(ldev); ret = 0; @@ -688,13 +655,11 @@ static int i40e_client_release(struct i40e_client *client) * i40e_client_prepare - prepare client specific resources * @client: pointer to the registered client * - * Return 0 on success or < 0 on error **/ -static int i40e_client_prepare(struct i40e_client *client) +static void i40e_client_prepare(struct i40e_client *client) { struct i40e_device *ldev; struct i40e_pf *pf; - int ret = 0; mutex_lock(&i40e_device_mutex); list_for_each_entry(ldev, &i40e_devices, list) { @@ -704,7 +669,6 @@ static int i40e_client_prepare(struct i40e_client *client) i40e_service_event_schedule(pf); } mutex_unlock(&i40e_device_mutex); - return ret; } /** @@ -961,13 +925,9 @@ int i40e_register_client(struct i40e_client *client) set_bit(__I40E_CLIENT_REGISTERED, &client->state); mutex_unlock(&i40e_client_mutex); - if (i40e_client_prepare(client)) { - ret = -EIO; - goto out; - } + i40e_client_prepare(client); - pr_info("i40e: Registered client %s with return code %d\n", - client->name, ret); + pr_info("i40e: Registered client %s\n", client->name); out: return ret; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 128735975caa..ece57d6a6e23 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -300,7 +300,6 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; u16 len; u8 *buf = (u8 *)buffer; - u16 i = 0; if ((!(mask & hw->debug_mask)) || (desc == NULL)) return; @@ -328,12 +327,18 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, if (buf_len < len) len = buf_len; /* write the full 16-byte chunks */ - for (i = 0; i < (len - 16); i += 16) - i40e_debug(hw, mask, "\t0x%04X %16ph\n", i, buf + i); - /* write whatever's left over without overrunning the buffer */ - if (i < len) - i40e_debug(hw, mask, "\t0x%04X %*ph\n", - i, len - i, buf + i); + if (hw->debug_mask & mask) { + char prefix[20]; + + snprintf(prefix, 20, + "i40e %02x:%02x.%x: \t0x", + hw->bus.bus_id, + hw->bus.device, + hw->bus.func); + + print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); + } } } @@ -1838,6 +1843,8 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed; hw_link_info->link_info = resp->link_info; hw_link_info->an_info = resp->an_info; + hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | + I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; hw_link_info->loopback = resp->loopback; hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index f1f41f12902f..267ad2588255 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -974,7 +974,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, struct i40e_dcbx_config *r_cfg = &pf->hw.remote_dcbx_config; int i, ret; - u32 switch_id; + u16 switch_id; bw_data = kzalloc(sizeof( struct i40e_aqc_query_port_ets_config_resp), @@ -986,7 +986,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp, vsi = pf->vsi[pf->lan_vsi]; switch_id = - vsi->info.switch_id & I40E_AQ_VSI_SW_ID_MASK; + le16_to_cpu(vsi->info.switch_id) & + I40E_AQ_VSI_SW_ID_MASK; ret = i40e_aq_query_port_ets_config(&pf->hw, switch_id, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index cc1465aac2ef..a22e26200bcc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -803,9 +803,12 @@ static int i40e_set_settings(struct net_device *netdev, if (change || (abilities.link_speed != config.link_speed)) { /* copy over the rest of the abilities */ config.phy_type = abilities.phy_type; + config.phy_type_ext = abilities.phy_type_ext; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_PHY_FEC_CONFIG_MASK; /* save the requested speeds */ hw->phy.link_info.requested_speeds = config.link_speed; @@ -2072,7 +2075,7 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector; u16 vector, intrl; - intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit); vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs; vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs; @@ -2116,6 +2119,7 @@ static int __i40e_set_coalesce(struct net_device *netdev, struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + u16 intrl_reg; int i; if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) @@ -2127,8 +2131,9 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->rx_coalesce_usecs_high >= INTRL_REG_TO_USEC(I40E_MAX_INTRL)) { - netif_info(pf, drv, netdev, "Invalid value, rx-usecs-high range is 0-235\n"); + if (ec->rx_coalesce_usecs_high > INTRL_REG_TO_USEC(I40E_MAX_INTRL)) { + netif_info(pf, drv, netdev, "Invalid value, rx-usecs-high range is 0-%lu\n", + INTRL_REG_TO_USEC(I40E_MAX_INTRL)); return -EINVAL; } @@ -2141,7 +2146,12 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - vsi->int_rate_limit = ec->rx_coalesce_usecs_high; + intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high); + vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg); + if (vsi->int_rate_limit != ec->rx_coalesce_usecs_high) { + netif_info(pf, drv, netdev, "Interrupt rate limit rounded down to %d\n", + vsi->int_rate_limit); + } if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b2f76d24000d..e8a8351c8ea9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 25 +#define DRV_VERSION_BUILD 27 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -77,7 +77,6 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, - {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, @@ -1254,7 +1253,9 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, struct hlist_head *tmp_del_list, int vlan_filters) { + s16 pvid = le16_to_cpu(vsi->info.pvid); struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new; struct hlist_node *h; int bkt, new_vlan; @@ -1273,13 +1274,13 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, */ /* Update the filters about to be added in place */ - hlist_for_each_entry(f, tmp_add_list, hlist) { - if (vsi->info.pvid && f->vlan != vsi->info.pvid) - f->vlan = vsi->info.pvid; - else if (vlan_filters && f->vlan == I40E_VLAN_ANY) - f->vlan = 0; - else if (!vlan_filters && f->vlan == 0) - f->vlan = I40E_VLAN_ANY; + hlist_for_each_entry(new, tmp_add_list, hlist) { + if (pvid && new->f->vlan != pvid) + new->f->vlan = pvid; + else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY) + new->f->vlan = 0; + else if (!vlan_filters && new->f->vlan == 0) + new->f->vlan = I40E_VLAN_ANY; } /* Update the remaining active filters */ @@ -1289,12 +1290,12 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, * order to avoid duplicating code for adding the new filter * then deleting the old filter. */ - if ((vsi->info.pvid && f->vlan != vsi->info.pvid) || + if ((pvid && f->vlan != pvid) || (vlan_filters && f->vlan == I40E_VLAN_ANY) || (!vlan_filters && f->vlan == 0)) { /* Determine the new vlan we will be adding */ - if (vsi->info.pvid) - new_vlan = vsi->info.pvid; + if (pvid) + new_vlan = pvid; else if (vlan_filters) new_vlan = 0; else @@ -1305,9 +1306,16 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, if (!add_head) return -ENOMEM; - /* Put the replacement filter into the add list */ - hash_del(&add_head->hlist); - hlist_add_head(&add_head->hlist, tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + return -ENOMEM; + + new->f = add_head; + new->state = add_head->state; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new->hlist, tmp_add_list); /* Put the original filter into the delete list */ f->state = I40E_FILTER_REMOVE; @@ -1434,23 +1442,25 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() * instead of list_for_each_entry(). **/ -static void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f) +void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f) { if (!f) return; + /* If the filter was never added to firmware then we can just delete it + * directly and we don't want to set the status to remove or else an + * admin queue command will unnecessarily fire. + */ if ((f->state == I40E_FILTER_FAILED) || (f->state == I40E_FILTER_NEW)) { - /* this one never got added by the FW. Just remove it, - * no need to sync anything. - */ hash_del(&f->hlist); kfree(f); } else { f->state = I40E_FILTER_REMOVE; - vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } + + vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } /** @@ -1477,18 +1487,19 @@ void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan) } /** - * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans + * i40e_add_mac_filter - Add a MAC filter for all active VLANs * @vsi: the VSI to be searched * @macaddr: the mac address to be filtered * - * Goes through all the macvlan filters and adds a macvlan filter for each + * If we're not in VLAN mode, just add the filter to I40E_VLAN_ANY. Otherwise, + * go through all the macvlan filters and add a macvlan filter for each * unique vlan that already exists. If a PVID has been assigned, instead only * add the macaddr to that VLAN. * * Returns last filter added on success, else NULL **/ -struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, - const u8 *macaddr) +struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi, + const u8 *macaddr) { struct i40e_mac_filter *f, *add = NULL; struct hlist_node *h; @@ -1498,6 +1509,9 @@ struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, return i40e_add_filter(vsi, macaddr, le16_to_cpu(vsi->info.pvid)); + if (!i40e_is_vsi_in_vlan(vsi)) + return i40e_add_filter(vsi, macaddr, I40E_VLAN_ANY); + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_REMOVE) continue; @@ -1510,15 +1524,16 @@ struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, } /** - * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS + * i40e_del_mac_filter - Remove a MAC filter from all VLANs * @vsi: the VSI to be searched * @macaddr: the mac address to be removed * - * Removes a given MAC address from a VSI, regardless of VLAN + * Removes a given MAC address from a VSI regardless of what VLAN it has been + * associated with. * * Returns 0 for success, or error **/ -int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr) +int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr) { struct i40e_mac_filter *f; struct hlist_node *h; @@ -1579,8 +1594,8 @@ static int i40e_set_mac(struct net_device *netdev, void *p) netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); spin_lock_bh(&vsi->mac_filter_hash_lock); - i40e_del_mac_all_vlan(vsi, netdev->dev_addr); - i40e_put_mac_in_vlan(vsi, addr->sa_data); + i40e_del_mac_filter(vsi, netdev->dev_addr); + i40e_add_mac_filter(vsi, addr->sa_data); spin_unlock_bh(&vsi->mac_filter_hash_lock); ether_addr_copy(netdev->dev_addr, addr->sa_data); if (vsi->type == I40E_VSI_MAIN) { @@ -1756,14 +1771,8 @@ static int i40e_addr_sync(struct net_device *netdev, const u8 *addr) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_mac_filter *f; - if (i40e_is_vsi_in_vlan(vsi)) - f = i40e_put_mac_in_vlan(vsi, addr); - else - f = i40e_add_filter(vsi, addr, I40E_VLAN_ANY); - - if (f) + if (i40e_add_mac_filter(vsi, addr)) return 0; else return -ENOMEM; @@ -1782,10 +1791,7 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - if (i40e_is_vsi_in_vlan(vsi)) - i40e_del_mac_all_vlan(vsi, addr); - else - i40e_del_filter(vsi, addr, I40E_VLAN_ANY); + i40e_del_mac_filter(vsi, addr); return 0; } @@ -1823,16 +1829,15 @@ static void i40e_set_rx_mode(struct net_device *netdev) } /** - * i40e_undo_filter_entries - Undo the changes made to MAC filter entries + * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to VSI struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * - * MAC filter entries from list were slated to be sent to firmware, either for - * addition or deletion. + * MAC filter entries from this list were slated for deletion. **/ -static void i40e_undo_filter_entries(struct i40e_vsi *vsi, - struct hlist_head *from) +static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) { struct i40e_mac_filter *f; struct hlist_node *h; @@ -1847,6 +1852,53 @@ static void i40e_undo_filter_entries(struct i40e_vsi *vsi, } /** + * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries + * @vsi: Pointer to vsi struct + * @from: Pointer to list which contains MAC filter entries - changes to + * those entries needs to be undone. + * + * MAC filter entries from this list were slated for addition. + **/ +static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) +{ + struct i40e_new_mac_filter *new; + struct hlist_node *h; + + hlist_for_each_entry_safe(new, h, from, hlist) { + /* We can simply free the wrapper structure */ + hlist_del(&new->hlist); + kfree(new); + } +} + +/** + * i40e_next_entry - Get the next non-broadcast filter from a list + * @next: pointer to filter in list + * + * Returns the next non-broadcast filter in the list. Required so that we + * ignore broadcast filters within the list, since these are not handled via + * the normal firmware update path. + */ +static +struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next) +{ + while (next) { + next = hlist_entry(next->hlist.next, + typeof(struct i40e_new_mac_filter), + hlist); + + /* keep going if we found a broadcast filter */ + if (next && is_broadcast_ether_addr(next->f->macaddr)) + continue; + + break; + } + + return next; +} + +/** * i40e_update_filter_state - Update filter state based on return data * from firmware * @count: Number of filters added @@ -1859,7 +1911,7 @@ static void i40e_undo_filter_entries(struct i40e_vsi *vsi, static int i40e_update_filter_state(int count, struct i40e_aqc_add_macvlan_element_data *add_list, - struct i40e_mac_filter *add_head) + struct i40e_new_mac_filter *add_head) { int retval = 0; int i; @@ -1878,9 +1930,9 @@ i40e_update_filter_state(int count, retval++; } - add_head = hlist_entry(add_head->hlist.next, - typeof(struct i40e_mac_filter), - hlist); + add_head = i40e_next_filter(add_head); + if (!add_head) + break; } return retval; @@ -1937,7 +1989,7 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, - struct i40e_mac_filter *add_head, + struct i40e_new_mac_filter *add_head, int num_add, bool *promisc_changed) { struct i40e_hw *hw = &vsi->back->hw; @@ -1965,10 +2017,12 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, * This function sets or clears the promiscuous broadcast flags for VLAN * filters in order to properly receive broadcast frames. Assumes that only * broadcast filters are passed. + * + * Returns status indicating success or failure; **/ -static -void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, - struct i40e_mac_filter *f) +static i40e_status +i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, + struct i40e_mac_filter *f) { bool enable = f->state == I40E_FILTER_NEW; struct i40e_hw *hw = &vsi->back->hw; @@ -1987,15 +2041,13 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) { + if (aq_ret) dev_warn(&vsi->back->pdev->dev, "Error %s setting broadcast promiscuous mode on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); - f->state = I40E_FILTER_FAILED; - } else if (enable) { - f->state = I40E_FILTER_ACTIVE; - } + + return aq_ret; } /** @@ -2009,7 +2061,8 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct hlist_head tmp_add_list, tmp_del_list; - struct i40e_mac_filter *f, *add_head = NULL; + struct i40e_mac_filter *f; + struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; @@ -2063,8 +2116,17 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) continue; } if (f->state == I40E_FILTER_NEW) { - hash_del(&f->hlist); - hlist_add_head(&f->hlist, &tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + goto err_no_memory_locked; + + /* Store pointer to the real filter */ + new->f = f; + new->state = f->state; + + /* Add it to the hash list */ + hlist_add_head(&new->hlist, &tmp_add_list); } /* Count the number of active (current and new) VLAN @@ -2099,7 +2161,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) cmd_flags = 0; /* handle broadcast filters by updating the broadcast - * promiscuous flag instead of deleting a MAC filter. + * promiscuous flag and release filter list. */ if (is_broadcast_ether_addr(f->macaddr)) { i40e_aqc_broadcast_filter(vsi, vsi_name, f); @@ -2157,36 +2219,37 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) goto err_no_memory; num_add = 0; - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)) { - f->state = I40E_FILTER_FAILED; + new->state = I40E_FILTER_FAILED; continue; } /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ - if (is_broadcast_ether_addr(f->macaddr)) { - u64 key = i40e_addr_to_hkey(f->macaddr); - i40e_aqc_broadcast_filter(vsi, vsi_name, f); - - hlist_del(&f->hlist); - hash_add(vsi->mac_filter_hash, &f->hlist, key); + if (is_broadcast_ether_addr(new->f->macaddr)) { + if (i40e_aqc_broadcast_filter(vsi, vsi_name, + new->f)) + new->state = I40E_FILTER_FAILED; + else + new->state = I40E_FILTER_ACTIVE; continue; } /* add to add array */ if (num_add == 0) - add_head = f; + add_head = new; cmd_flags = 0; - ether_addr_copy(add_list[num_add].mac_addr, f->macaddr); - if (f->vlan == I40E_VLAN_ANY) { + ether_addr_copy(add_list[num_add].mac_addr, + new->f->macaddr); + if (new->f->vlan == I40E_VLAN_ANY) { add_list[num_add].vlan_tag = 0; cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { add_list[num_add].vlan_tag = - cpu_to_le16((u16)(f->vlan)); + cpu_to_le16((u16)(new->f->vlan)); } add_list[num_add].queue_number = 0; /* set invalid match method for later detection */ @@ -2212,11 +2275,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) * the VSI's list. */ spin_lock_bh(&vsi->mac_filter_hash_lock); - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { - u64 key = i40e_addr_to_hkey(f->macaddr); - - hlist_del(&f->hlist); - hash_add(vsi->mac_filter_hash, &f->hlist, key); + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { + /* Only update the state if we're still NEW */ + if (new->f->state == I40E_FILTER_NEW) + new->f->state = new->state; + hlist_del(&new->hlist); + kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); kfree(add_list); @@ -2377,8 +2441,8 @@ err_no_memory: /* Restore elements on the temporary add and delete lists */ spin_lock_bh(&vsi->mac_filter_hash_lock); err_no_memory_locked: - i40e_undo_filter_entries(vsi, &tmp_del_list); - i40e_undo_filter_entries(vsi, &tmp_add_list); + i40e_undo_del_filter_entries(vsi, &tmp_del_list); + i40e_undo_add_filter_entries(vsi, &tmp_add_list); spin_unlock_bh(&vsi->mac_filter_hash_lock); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; @@ -2568,12 +2632,15 @@ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) /** * i40e_vsi_add_vlan - Add VSI membership for given VLAN * @vsi: the VSI being configured - * @vid: VLAN id to be added (0 = untagged only , -1 = any) + * @vid: VLAN id to be added **/ -int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) +int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid) { int err; + if (!vid || vsi->info.pvid) + return -EINVAL; + /* Locked once because all functions invoked below iterates list*/ spin_lock_bh(&vsi->mac_filter_hash_lock); err = i40e_add_vlan_all_mac(vsi, vid); @@ -2616,10 +2683,13 @@ void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) /** * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN * @vsi: the VSI being configured - * @vid: VLAN id to be removed (0 = untagged only , -1 = any) + * @vid: VLAN id to be removed **/ -void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) +void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid) { + if (!vid || vsi->info.pvid) + return; + spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_rm_vlan_all_mac(vsi, vid); spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -3266,7 +3336,7 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); wr32(hw, I40E_PFINT_RATEN(vector - 1), - INTRL_USEC_TO_REG(vsi->int_rate_limit)); + i40e_intrl_usec_to_reg(vsi->int_rate_limit)); /* Linked list for the queuepairs assigned to this vector */ wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp); @@ -4615,8 +4685,10 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) */ if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { + local_bh_disable(); if (napi_reschedule(&tx_ring->q_vector->napi)) tx_ring->tx_stats.tx_lost_interrupt++; + local_bh_enable(); } } @@ -5270,6 +5342,8 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) enum i40e_aq_link_speed new_speed; char *speed = "Unknown"; char *fc = "Unknown"; + char *fec = ""; + char *an = ""; new_speed = vsi->back->hw.phy.link_info.link_speed; @@ -5329,8 +5403,23 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - netdev_info(vsi->netdev, "NIC Link is Up %sbps Full Duplex, Flow Control: %s\n", - speed, fc); + if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { + fec = ", FEC: None"; + an = ", Autoneg: False"; + + if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + an = ", Autoneg: True"; + + if (vsi->back->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_KR_ENA) + fec = ", FEC: CL74 FC-FEC/BASE-R"; + else if (vsi->back->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_RS_ENA) + fec = ", FEC: CL108 RS-FEC"; + } + + netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s, Flow Control: %s\n", + speed, fec, an, fc); } /** @@ -6265,7 +6354,16 @@ static void i40e_link_event(struct i40e_pf *pf) old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP); status = i40e_get_link_status(&pf->hw, &new_link); - if (status) { + + /* On success, disable temp link polling */ + if (status == I40E_SUCCESS) { + if (pf->flags & I40E_FLAG_TEMP_LINK_POLLING) + pf->flags &= ~I40E_FLAG_TEMP_LINK_POLLING; + } else { + /* Enable link polling temporarily until i40e_get_link_status + * returns I40E_SUCCESS + */ + pf->flags |= I40E_FLAG_TEMP_LINK_POLLING; dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n", status); return; @@ -6317,7 +6415,8 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf) return; pf->service_timer_previous = jiffies; - if (pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) + if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) || + (pf->flags & I40E_FLAG_TEMP_LINK_POLLING)) i40e_link_event(pf); /* Update the stats for active netdevs so the network stack @@ -8682,7 +8781,7 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->hw.func_caps.fd_filters_best_effort; } - if (i40e_is_mac_710(&pf->hw) && + if ((pf->hw.mac.type == I40E_MAC_XL710) && (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || (pf->hw.aq.fw_maj_ver < 4))) { pf->flags |= I40E_FLAG_RESTART_AUTONEG; @@ -8691,13 +8790,13 @@ static int i40e_sw_init(struct i40e_pf *pf) } /* Disable FW LLDP if FW < v4.3 */ - if (i40e_is_mac_710(&pf->hw) && + if ((pf->hw.mac.type == I40E_MAC_XL710) && (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || (pf->hw.aq.fw_maj_ver < 4))) pf->flags |= I40E_FLAG_STOP_FW_LLDP; /* Use the FW Set LLDP MIB API if FW > v4.40 */ - if (i40e_is_mac_710(&pf->hw) && + if ((pf->hw.mac.type == I40E_MAC_XL710) && (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) || (pf->hw.aq.fw_maj_ver >= 5))) pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB; @@ -8728,16 +8827,17 @@ static int i40e_sw_init(struct i40e_pf *pf) } #endif /* CONFIG_PCI_IOV */ if (pf->hw.mac.type == I40E_MAC_X722) { - pf->flags |= I40E_FLAG_RSS_AQ_CAPABLE | - I40E_FLAG_128_QP_RSS_CAPABLE | - I40E_FLAG_HW_ATR_EVICT_CAPABLE | - I40E_FLAG_OUTER_UDP_CSUM_CAPABLE | - I40E_FLAG_WB_ON_ITR_CAPABLE | - I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | - I40E_FLAG_NO_PCI_LINK_CHECK | - I40E_FLAG_USE_SET_LLDP_MIB | - I40E_FLAG_GENEVE_OFFLOAD_CAPABLE | - I40E_FLAG_PTP_L4_CAPABLE; + pf->flags |= I40E_FLAG_RSS_AQ_CAPABLE + | I40E_FLAG_128_QP_RSS_CAPABLE + | I40E_FLAG_HW_ATR_EVICT_CAPABLE + | I40E_FLAG_OUTER_UDP_CSUM_CAPABLE + | I40E_FLAG_WB_ON_ITR_CAPABLE + | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE + | I40E_FLAG_NO_PCI_LINK_CHECK + | I40E_FLAG_USE_SET_LLDP_MIB + | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE + | I40E_FLAG_PTP_L4_CAPABLE + | I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE; } else if ((pf->hw.aq.api_maj_ver > 1) || ((pf->hw.aq.api_maj_ver == 1) && (pf->hw.aq.api_min_ver > 4))) { @@ -9339,7 +9439,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) */ i40e_rm_default_mac_filter(vsi, mac_addr); spin_lock_bh(&vsi->mac_filter_hash_lock); - i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY); + i40e_add_mac_filter(vsi, mac_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); } else { /* relate the VSI_VMDQ name to the VSI_MAIN name */ @@ -9348,7 +9448,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) random_ether_addr(mac_addr); spin_lock_bh(&vsi->mac_filter_hash_lock); - i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY); + i40e_add_mac_filter(vsi, mac_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); } @@ -9367,7 +9467,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) */ eth_broadcast_addr(broadcast); spin_lock_bh(&vsi->mac_filter_hash_lock); - i40e_add_filter(vsi, broadcast, I40E_VLAN_ANY); + i40e_add_mac_filter(vsi, broadcast); spin_unlock_bh(&vsi->mac_filter_hash_lock); ether_addr_copy(netdev->dev_addr, mac_addr); @@ -10673,7 +10773,6 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) i40e_pf_config_rss(pf); /* fill in link information and enable LSE reporting */ - i40e_update_link_info(&pf->hw); i40e_link_event(pf); /* Initialize user-specific link properties */ @@ -10988,6 +11087,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->subsystem_device_id = pdev->subsystem_device; hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); + hw->bus.bus_id = pdev->bus->number; pf->instance = pfs_found; /* set up the locks for the AQ, do this only once in probe @@ -11653,6 +11753,53 @@ static void i40e_pci_error_resume(struct pci_dev *pdev) } /** + * i40e_enable_mc_magic_wake - enable multicast magic packet wake up + * using the mac_address_write admin q function + * @pf: pointer to i40e_pf struct + **/ +static void i40e_enable_mc_magic_wake(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + i40e_status ret; + u8 mac_addr[6]; + u16 flags = 0; + + /* Get current MAC address in case it's an LAA */ + if (pf->vsi[pf->lan_vsi] && pf->vsi[pf->lan_vsi]->netdev) { + ether_addr_copy(mac_addr, + pf->vsi[pf->lan_vsi]->netdev->dev_addr); + } else { + dev_err(&pf->pdev->dev, + "Failed to retrieve MAC address; using default\n"); + ether_addr_copy(mac_addr, hw->mac.addr); + } + + /* The FW expects the mac address write cmd to first be called with + * one of these flags before calling it again with the multicast + * enable flags. + */ + flags = I40E_AQC_WRITE_TYPE_LAA_WOL; + + if (hw->func_caps.flex10_enable && hw->partition_id != 1) + flags = I40E_AQC_WRITE_TYPE_LAA_ONLY; + + ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); + if (ret) { + dev_err(&pf->pdev->dev, + "Failed to update MAC address registers; cannot enable Multicast Magic packet wake up"); + return; + } + + flags = I40E_AQC_MC_MAG_EN + | I40E_AQC_WOL_PRESERVE_ON_PFR + | I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG; + ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); + if (ret) + dev_err(&pf->pdev->dev, + "Failed to enable Multicast Magic Packet wake up\n"); +} + +/** * i40e_shutdown - PCI callback for shutting down * @pdev: PCI device information struct **/ @@ -11674,6 +11821,9 @@ static void i40e_shutdown(struct pci_dev *pdev) cancel_work_sync(&pf->service_task); i40e_fdir_teardown(pf); + if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE)) + i40e_enable_mc_magic_wake(pf); + rtnl_lock(); i40e_prep_for_reset(pf); rtnl_unlock(); @@ -11705,6 +11855,9 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) set_bit(__I40E_SUSPENDED, &pf->state); set_bit(__I40E_DOWN, &pf->state); + if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE)) + i40e_enable_mc_magic_wake(pf); + rtnl_lock(); i40e_prep_for_reset(pf); rtnl_unlock(); diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h index 5b6feb7edeb1..fea81ed065db 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h +++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h @@ -55,7 +55,7 @@ struct i40e_dma_mem { void *va; dma_addr_t pa; u32 size; -} __packed; +}; #define i40e_allocate_dma_mem(h, m, unused, s, a) \ i40e_allocate_dma_mem_d(h, m, s, a) @@ -64,17 +64,17 @@ struct i40e_dma_mem { struct i40e_virt_mem { void *va; u32 size; -} __packed; +}; #define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s) #define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m) -#define i40e_debug(h, m, s, ...) \ -do { \ - if (((m) & (h)->debug_mask)) \ - pr_info("i40e %02x.%x " s, \ - (h)->bus.device, (h)->bus.func, \ - ##__VA_ARGS__); \ +#define i40e_debug(h, m, s, ...) \ +do { \ + if (((m) & (h)->debug_mask)) \ + pr_info("i40e %02x:%02x.%x " s, \ + (h)->bus.bus_id, (h)->bus.device, \ + (h)->bus.func, ##__VA_ARGS__); \ } while (0) typedef enum i40e_status_code i40e_status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 9e49ffafce28..2caee35528fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -280,7 +280,7 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - int i; + unsigned int i, cleared = 0; /* Since we cannot turn off the Rx timestamp logic if the device is * configured for Tx timestamping, we check if Rx timestamping is @@ -306,14 +306,25 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) time_is_before_jiffies(pf->latch_events[i] + HZ)) { rd32(hw, I40E_PRTTSYN_RXTIME_H(i)); pf->latch_event_flags &= ~BIT(i); - pf->rx_hwtstamp_cleared++; - dev_warn(&pf->pdev->dev, - "Clearing a missed Rx timestamp event for RXTIME[%d]\n", - i); + cleared++; } } spin_unlock_bh(&pf->ptp_rx_lock); + + /* Log a warning if more than 2 timestamps got dropped in the same + * check. We don't want to warn about all drops because it can occur + * in normal scenarios such as PTP frames on multicast addresses we + * aren't listening to. However, administrator should know if this is + * the reason packets aren't receiving timestamps. + */ + if (cleared > 2) + dev_dbg(&pf->pdev->dev, + "Dropped %d missed RXTIME timestamp events\n", + cleared); + + /* Finally, update the rx_hwtstamp_cleared counter */ + pf->rx_hwtstamp_cleared += cleared; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 352cf7cd2ef4..97d46058d71d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -432,7 +432,12 @@ unsupported_flow: ret = -EINVAL; } - /* The buffer allocated here is freed by the i40e_clean_tx_ring() */ + /* The buffer allocated here will be normally be freed by + * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit + * completion. In the event of an error adding the buffer to the FDIR + * ring, it will immediately be freed. It may also be freed by + * i40e_clean_tx_ring() when closing the VSI. + */ return ret; } @@ -1013,14 +1018,15 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (rx_bi->skb) { - dev_kfree_skb(rx_bi->skb); - rx_bi->skb = NULL; - } if (!rx_bi->page) continue; @@ -1425,45 +1431,6 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, } /** - * i40e_pull_tail - i40e specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being adjusted - * - * This function is an i40e specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed @@ -1478,10 +1445,6 @@ static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) **/ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - i40e_pull_tail(rx_ring, skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -1513,19 +1476,85 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, } /** - * i40e_page_is_reserved - check if reuse is possible + * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. */ -static inline bool i40e_page_is_reserved(struct page *page) +static inline bool i40e_page_is_reusable(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * @page: page address from rx_buffer + * @truesize: actual size of the buffer in this page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct page *page, + const unsigned int truesize) +{ +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Inc ref count on page before passing it up to the stack */ + get_page(page); + + return true; } /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware + * @size: packet length from rx_desc * @skb: sk_buff to place the data into * * This function will add the data contained in rx_buffer->page to the skb. @@ -1538,30 +1567,29 @@ static inline bool i40e_page_is_reserved(struct page *page) **/ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - union i40e_rx_desc *rx_desc, + unsigned int size, struct sk_buff *skb) { struct page *page = rx_buffer->page; - u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif + unsigned int pull_len; + + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - + if (size <= I40E_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!i40e_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (likely(i40e_page_is_reusable(page))) return true; /* this page cannot be reused so discard it */ @@ -1569,34 +1597,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, return false; } - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + /* we need the header to contain the greater of either + * ETH_HLEN or 60 bytes if the skb->len is less than + * 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* align pull length to size of long to optimize + * memcpy performance + */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - if (rx_buffer->page_offset > last_offset) - return false; -#endif + /* update all of the pointers */ + va += pull_len; + size -= pull_len; - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); +add_tail_frag: + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + (unsigned long)va & ~PAGE_MASK, size, truesize); - return true; + return i40e_can_reuse_rx_page(rx_buffer, page, truesize); } /** @@ -1611,18 +1631,21 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, */ static inline struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) { + u64 local_status_error_len = + le64_to_cpu(rx_desc->wb.qword1.status_error_len); + unsigned int size = + (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct i40e_rx_buffer *rx_buffer; - struct sk_buff *skb; struct page *page; rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; page = rx_buffer->page; prefetchw(page); - skb = rx_buffer->skb; - if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -1646,19 +1669,17 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, * it now to avoid a possible cache miss */ prefetchw(skb->data); - } else { - rx_buffer->skb = NULL; } /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - I40E_RXBUFFER_2048, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; @@ -1700,7 +1721,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, #define staterrlen rx_desc->wb.qword1.status_error_len if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) { i40e_clean_programming_status(rx_ring, rx_desc); - rx_ring->rx_bi[ntc].skb = skb; return true; } /* if we are the last buffer then there is nothing else to do */ @@ -1708,8 +1728,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_bi[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1730,12 +1748,12 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; - struct sk_buff *skb; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1764,7 +1782,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - skb = i40e_fetch_rx_buffer(rx_ring, rx_desc); + skb = i40e_fetch_rx_buffer(rx_ring, rx_desc, skb); if (!skb) break; @@ -1783,8 +1801,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) continue; } - if (i40e_cleanup_headers(rx_ring, skb)) + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1809,11 +1829,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_packets++; } + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1841,14 +1864,14 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN -static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) { - return !!(vsi->rx_rings[idx]->rx_itr_setting); + return vsi->rx_rings[idx]->rx_itr_setting; } -static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) { - return !!(vsi->tx_rings[idx]->tx_itr_setting); + return vsi->tx_rings[idx]->tx_itr_setting; } /** @@ -1874,8 +1897,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - rx_itr_setting = get_rx_itr_enabled(vsi, idx); - tx_itr_setting = get_tx_itr_enabled(vsi, idx); + rx_itr_setting = get_rx_itr(vsi, idx); + tx_itr_setting = get_tx_itr(vsi, idx); if (q_vector->itr_countdown > 0 || (!ITR_IS_DYNAMIC(rx_itr_setting) && @@ -2251,14 +2274,16 @@ out: /** * i40e_tso - set up the tso context descriptor - * @skb: ptr to the skb we're sending + * @first: pointer to first Tx buffer for xmit * @hdr_len: ptr to the size of the packet header * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ -static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) +static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, + u64 *cd_type_cmd_tso_mss) { + struct sk_buff *skb = first->skb; u64 cd_cmd, cd_tso_len, cd_mss; union { struct iphdr *v4; @@ -2271,6 +2296,7 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) unsigned char *hdr; } l4; u32 paylen, l4_offset; + u16 gso_segs, gso_size; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -2309,7 +2335,8 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) /* remove payload length from outer checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.udp->check, htonl(paylen)); + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); } /* reset pointers to inner headers */ @@ -2330,15 +2357,23 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* compute length of segmentation header */ *hdr_len = (l4.tcp->doff * 4) + l4_offset; + /* pull values out of skb_shinfo */ + gso_size = skb_shinfo(skb)->gso_size; + gso_segs = skb_shinfo(skb)->gso_segs; + + /* update GSO size and bytecount with header size */ + first->gso_segs = gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; - cd_mss = skb_shinfo(skb)->gso_size; + cd_mss = gso_size; *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); @@ -2699,7 +2734,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u16 i = tx_ring->next_to_use; u32 td_tag = 0; dma_addr_t dma; - u16 gso_segs; u16 desc_count = 1; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { @@ -2708,15 +2742,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TX_FLAGS_VLAN_SHIFT; } - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) - gso_segs = skb_shinfo(skb)->gso_segs; - else - gso_segs = 1; - - /* multiply data chunks by size of headers */ - first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); - first->gso_segs = gso_segs; - first->skb = skb; first->tx_flags = tx_flags; dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); @@ -2902,8 +2927,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, count = i40e_xmit_descriptor_count(skb); if (i40e_chk_linearize(skb, count)) { - if (__skb_linearize(skb)) - goto out_drop; + if (__skb_linearize(skb)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } count = i40e_txd_use_count(skb->len); tx_ring->tx_stats.tx_linearize++; } @@ -2919,6 +2946,12 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_BUSY; } + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_bi[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) goto out_drop; @@ -2926,16 +2959,13 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, /* obtain protocol of skb */ protocol = vlan_get_protocol(skb); - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_bi[tx_ring->next_to_use]; - /* setup IPv4/IPv6 offloads */ if (protocol == htons(ETH_P_IP)) tx_flags |= I40E_TX_FLAGS_IPV4; else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss); + tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; @@ -2973,7 +3003,8 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_OK; out_drop: - dev_kfree_skb_any(skb); + dev_kfree_skb_any(first->skb); + first->skb = NULL; return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index e065321ce8ed..f80979025c01 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -52,7 +52,20 @@ */ #define INTRL_ENA BIT(6) #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) -#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) +/** + * i40e_intrl_usec_to_reg - convert interrupt rate limit to register + * @intrl: interrupt rate limit to convert + * + * This function converts a decimal interrupt rate limit to the appropriate + * register format expected by the firmware when setting interrupt rate limit. + */ +static inline u16 i40e_intrl_usec_to_reg(int intrl) +{ + if (intrl >> 2) + return ((intrl >> 2) | INTRL_ENA); + else + return 0; +} #define I40E_INTRL_8K 125 /* 8000 ints/sec */ #define I40E_INTRL_62K 16 /* 62500 ints/sec */ #define I40E_INTRL_83K 12 /* 83333 ints/sec */ @@ -240,7 +253,6 @@ struct i40e_tx_buffer { }; struct i40e_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; unsigned int page_offset; @@ -341,6 +353,14 @@ struct i40e_ring { struct rcu_head rcu; /* to avoid race on free */ u16 next_to_alloc; + struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must + * return before it sees the EOP for + * the current packet, we save that skb + * here and resume receiving this + * packet the next time + * i40e_clean_rx_ring_irq() is called + * for this ring. + */ } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index edc0abdf4783..939f9fdc8f85 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -125,7 +125,6 @@ enum i40e_debug_mask { */ enum i40e_mac_type { I40E_MAC_UNKNOWN = 0, - I40E_MAC_X710, I40E_MAC_XL710, I40E_MAC_VF, I40E_MAC_X722, @@ -185,6 +184,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -470,6 +470,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index a6198b727e24..cbbf8648307a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -689,17 +689,15 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) spin_lock_bh(&vsi->mac_filter_hash_lock); if (is_valid_ether_addr(vf->default_lan_addr.addr)) { - f = i40e_add_filter(vsi, vf->default_lan_addr.addr, - vf->port_vlan_id ? - vf->port_vlan_id : -1); + f = i40e_add_mac_filter(vsi, + vf->default_lan_addr.addr); if (!f) dev_info(&pf->pdev->dev, "Could not add MAC filter %pM for VF %d\n", vf->default_lan_addr.addr, vf->vf_id); } eth_broadcast_addr(broadcast); - f = i40e_add_filter(vsi, broadcast, - vf->port_vlan_id ? vf->port_vlan_id : -1); + f = i40e_add_mac_filter(vsi, broadcast); if (!f) dev_info(&pf->pdev->dev, "Could not allocate VF broadcast filter\n"); @@ -1942,12 +1940,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_mac_filter *f; f = i40e_find_mac(vsi, al->list[i].addr); - if (!f) { - if (i40e_is_vsi_in_vlan(vsi)) - f = i40e_put_mac_in_vlan(vsi, al->list[i].addr); - else - f = i40e_add_filter(vsi, al->list[i].addr, -1); - } + if (!f) + f = i40e_add_mac_filter(vsi, al->list[i].addr); if (!f) { dev_err(&pf->pdev->dev, @@ -2012,7 +2006,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete addresses from the list */ for (i = 0; i < al->num_elements; i++) - if (i40e_del_mac_all_vlan(vsi, al->list[i].addr)) { + if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = I40E_ERR_INVALID_MAC_ADDR; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; @@ -2722,14 +2716,13 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* delete the temporary mac address */ if (!is_zero_ether_addr(vf->default_lan_addr.addr)) - i40e_del_filter(vsi, vf->default_lan_addr.addr, - vf->port_vlan_id ? vf->port_vlan_id : -1); + i40e_del_mac_filter(vsi, vf->default_lan_addr.addr); /* Delete all the filters for this VSI - we're going to kill it * anyway. */ hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) - i40e_del_filter(vsi, f->macaddr, f->vlan); + __i40e_del_filter(vsi, f); spin_unlock_bh(&vsi->mac_filter_hash_lock); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index aa63b7fb993d..89dfdbca13db 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -64,7 +64,6 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw) hw->mac.type = I40E_MAC_X722; break; case I40E_DEV_ID_X722_VF: - case I40E_DEV_ID_X722_VF_HV: hw->mac.type = I40E_MAC_X722_VF; break; case I40E_DEV_ID_VF: @@ -305,7 +304,6 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, { struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; u8 *buf = (u8 *)buffer; - u16 i = 0; if ((!(mask & hw->debug_mask)) || (desc == NULL)) return; @@ -333,12 +331,18 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, if (buf_len < len) len = buf_len; /* write the full 16-byte chunks */ - for (i = 0; i < (len - 16); i += 16) - i40e_debug(hw, mask, "\t0x%04X %16ph\n", i, buf + i); - /* write whatever's left over without overrunning the buffer */ - if (i < len) - i40e_debug(hw, mask, "\t0x%04X %*ph\n", - i, len - i, buf + i); + if (hw->debug_mask & mask) { + char prefix[20]; + + snprintf(prefix, 20, + "i40evf %02x:%02x.%x: \t0x", + hw->bus.bus_id, + hw->bus.device, + hw->bus.func); + + print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); + } } } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h index 21dcaee1ad1d..d76393c95056 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h @@ -48,7 +48,6 @@ #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 #define I40E_DEV_ID_X722_VF 0x37CD -#define I40E_DEV_ID_X722_VF_HV 0x37D9 #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index df67ef37b7f3..c91fcf43ccbc 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -501,14 +501,15 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (rx_bi->skb) { - dev_kfree_skb(rx_bi->skb); - rx_bi->skb = NULL; - } if (!rx_bi->page) continue; @@ -903,45 +904,6 @@ void i40evf_process_skb_fields(struct i40e_ring *rx_ring, } /** - * i40e_pull_tail - i40e specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being adjusted - * - * This function is an i40e specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed @@ -956,10 +918,6 @@ static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) **/ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - i40e_pull_tail(rx_ring, skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -991,19 +949,85 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, } /** - * i40e_page_is_reserved - check if reuse is possible + * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. */ -static inline bool i40e_page_is_reserved(struct page *page) +static inline bool i40e_page_is_reusable(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * @page: page address from rx_buffer + * @truesize: actual size of the buffer in this page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct page *page, + const unsigned int truesize) +{ +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Inc ref count on page before passing it up to the stack */ + get_page(page); + + return true; } /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware + * @size: packet length from rx_desc * @skb: sk_buff to place the data into * * This function will add the data contained in rx_buffer->page to the skb. @@ -1016,30 +1040,29 @@ static inline bool i40e_page_is_reserved(struct page *page) **/ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - union i40e_rx_desc *rx_desc, + unsigned int size, struct sk_buff *skb) { struct page *page = rx_buffer->page; - u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif + unsigned int pull_len; + + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - + if (size <= I40E_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!i40e_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (likely(i40e_page_is_reusable(page))) return true; /* this page cannot be reused so discard it */ @@ -1047,34 +1070,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, return false; } - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + /* we need the header to contain the greater of either + * ETH_HLEN or 60 bytes if the skb->len is less than + * 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* align pull length to size of long to optimize + * memcpy performance + */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - if (rx_buffer->page_offset > last_offset) - return false; -#endif + /* update all of the pointers */ + va += pull_len; + size -= pull_len; - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); +add_tail_frag: + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + (unsigned long)va & ~PAGE_MASK, size, truesize); - return true; + return i40e_can_reuse_rx_page(rx_buffer, page, truesize); } /** @@ -1089,18 +1104,21 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, */ static inline struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) { + u64 local_status_error_len = + le64_to_cpu(rx_desc->wb.qword1.status_error_len); + unsigned int size = + (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct i40e_rx_buffer *rx_buffer; - struct sk_buff *skb; struct page *page; rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; page = rx_buffer->page; prefetchw(page); - skb = rx_buffer->skb; - if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -1124,19 +1142,17 @@ struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring, * it now to avoid a possible cache miss */ prefetchw(skb->data); - } else { - rx_buffer->skb = NULL; } /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - I40E_RXBUFFER_2048, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; @@ -1180,8 +1196,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_bi[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1202,12 +1216,12 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; - struct sk_buff *skb; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1236,7 +1250,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc); + skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc, skb); if (!skb) break; @@ -1255,8 +1269,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) continue; } - if (i40e_cleanup_headers(rx_ring, skb)) + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1273,11 +1289,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_packets++; } + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1305,18 +1324,18 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 -static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) { struct i40evf_adapter *adapter = vsi->back; - return !!(adapter->rx_rings[idx].rx_itr_setting); + return adapter->rx_rings[idx].rx_itr_setting; } -static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) { struct i40evf_adapter *adapter = vsi->back; - return !!(adapter->tx_rings[idx].tx_itr_setting); + return adapter->tx_rings[idx].tx_itr_setting; } /** @@ -1342,8 +1361,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - rx_itr_setting = get_rx_itr_enabled(vsi, idx); - tx_itr_setting = get_tx_itr_enabled(vsi, idx); + rx_itr_setting = get_rx_itr(vsi, idx); + tx_itr_setting = get_tx_itr(vsi, idx); if (q_vector->itr_countdown > 0 || (!ITR_IS_DYNAMIC(rx_itr_setting) && @@ -1549,14 +1568,16 @@ out: /** * i40e_tso - set up the tso context descriptor - * @skb: ptr to the skb we're sending + * @first: pointer to first Tx buffer for xmit * @hdr_len: ptr to the size of the packet header * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ -static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) +static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len, + u64 *cd_type_cmd_tso_mss) { + struct sk_buff *skb = first->skb; u64 cd_cmd, cd_tso_len, cd_mss; union { struct iphdr *v4; @@ -1569,6 +1590,7 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) unsigned char *hdr; } l4; u32 paylen, l4_offset; + u16 gso_segs, gso_size; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -1607,7 +1629,8 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) /* remove payload length from outer checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.udp->check, htonl(paylen)); + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); } /* reset pointers to inner headers */ @@ -1628,15 +1651,23 @@ static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); + csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* compute length of segmentation header */ *hdr_len = (l4.tcp->doff * 4) + l4_offset; + /* pull values out of skb_shinfo */ + gso_size = skb_shinfo(skb)->gso_size; + gso_segs = skb_shinfo(skb)->gso_segs; + + /* update GSO size and bytecount with header size */ + first->gso_segs = gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; - cd_mss = skb_shinfo(skb)->gso_size; + cd_mss = gso_size; *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); @@ -1949,7 +1980,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u16 i = tx_ring->next_to_use; u32 td_tag = 0; dma_addr_t dma; - u16 gso_segs; u16 desc_count = 1; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { @@ -1958,15 +1988,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TX_FLAGS_VLAN_SHIFT; } - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) - gso_segs = skb_shinfo(skb)->gso_segs; - else - gso_segs = 1; - - /* multiply data chunks by size of headers */ - first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); - first->gso_segs = gso_segs; - first->skb = skb; first->tx_flags = tx_flags; dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); @@ -2151,8 +2172,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, count = i40e_xmit_descriptor_count(skb); if (i40e_chk_linearize(skb, count)) { - if (__skb_linearize(skb)) - goto out_drop; + if (__skb_linearize(skb)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } count = i40e_txd_use_count(skb->len); tx_ring->tx_stats.tx_linearize++; } @@ -2168,6 +2191,12 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_BUSY; } + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_bi[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + /* prepare the xmit flags */ if (i40evf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) goto out_drop; @@ -2175,16 +2204,13 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, /* obtain protocol of skb */ protocol = vlan_get_protocol(skb); - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_bi[tx_ring->next_to_use]; - /* setup IPv4/IPv6 offloads */ if (protocol == htons(ETH_P_IP)) tx_flags |= I40E_TX_FLAGS_IPV4; else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss); + tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; @@ -2211,7 +2237,8 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_OK; out_drop: - dev_kfree_skb_any(skb); + dev_kfree_skb_any(first->skb); + first->skb = NULL; return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index a5fc789f78eb..8274ba68bd32 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -239,7 +239,6 @@ struct i40e_tx_buffer { }; struct i40e_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; unsigned int page_offset; @@ -340,6 +339,14 @@ struct i40e_ring { struct rcu_head rcu; /* to avoid race on free */ u16 next_to_alloc; + struct sk_buff *skb; /* When i40evf_clean_rx_ring_irq() must + * return before it sees the EOP for + * the current packet, we save that skb + * here and resume receiving this + * packet the next time + * i40evf_clean_rx_ring_irq() is called + * for this ring. + */ } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index c85e8a31c072..16bb88084bb9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -100,7 +100,6 @@ enum i40e_debug_mask { */ enum i40e_mac_type { I40E_MAC_UNKNOWN = 0, - I40E_MAC_X710, I40E_MAC_XL710, I40E_MAC_VF, I40E_MAC_X722, @@ -159,6 +158,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -443,6 +443,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index fc374f833aa9..d38a2b2aea2b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -81,6 +81,7 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_GET_STATS = 15, I40E_VIRTCHNL_OP_FCOE = 16, I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index fffe4cf2c20b..00c42d803276 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -195,6 +195,7 @@ struct i40evf_adapter { u64 hw_csum_rx_error; u32 rx_desc_count; int num_msix_vectors; + u32 client_pending; struct msix_entry *msix_entries; u32 flags; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index c0fc53361800..f35dcaac5bb7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 25 +#define DRV_VERSION_BUILD 27 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -59,7 +59,6 @@ static const struct pci_device_id i40evf_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_VF), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_VF_HV), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF), 0}, - {PCI_VDEVICE(INTEL, I40E_DEV_ID_X722_VF_HV), 0}, /* required last entry */ {0, } }; @@ -2154,6 +2153,11 @@ static int i40evf_close(struct net_device *netdev) adapter->state = __I40EVF_DOWN_PENDING; i40evf_free_traffic_irqs(adapter); + /* We explicitly don't free resources here because the hardware is + * still active and can DMA into memory. Resources are cleared in + * i40evf_virtchnl_completion() after we get confirmation from the PF + * driver that the rings have been stopped. + */ return 0; } @@ -2727,6 +2731,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->subsystem_device_id = pdev->subsystem_device; hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); + hw->bus.bus_id = pdev->bus->number; /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove @@ -2871,7 +2876,8 @@ static void i40evf_remove(struct pci_dev *pdev) i40evf_request_reset(adapter); msleep(50); } - + i40evf_free_all_tx_resources(adapter); + i40evf_free_all_rx_resources(adapter); i40evf_misc_irq_disable(adapter); i40evf_free_misc_irq(adapter); i40evf_reset_interrupt_capability(adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 2059a8e88908..bee58af390e1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -999,6 +999,10 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (v_opcode != adapter->current_op) return; break; + case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: + adapter->client_pending &= + ~(BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP)); + break; case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS: { struct i40e_virtchnl_rss_hena *vrh = (struct i40e_virtchnl_rss_hena *)msg; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7546109d4980..be456bae8169 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -383,9 +383,9 @@ static void igb_dump(struct igb_adapter *adapter) /* Print netdevice Info */ if (netdev) { dev_info(&adapter->pdev->dev, "Net device Info\n"); - pr_info("Device Name state trans_start last_rx\n"); - pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, - netdev->state, dev_trans_start(netdev), netdev->last_rx); + pr_info("Device Name state trans_start\n"); + pr_info("%-15s %016lX %016lX\n", netdev->name, + netdev->state, dev_trans_start(netdev)); } /* Print Registers */ @@ -3964,8 +3964,8 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) PAGE_SIZE, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - __page_frag_drain(buffer_info->page, 0, - buffer_info->pagecnt_bias); + __page_frag_cache_drain(buffer_info->page, + buffer_info->pagecnt_bias); buffer_info->page = NULL; } @@ -6991,7 +6991,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - __page_frag_drain(page, 0, rx_buffer->pagecnt_bias); + __page_frag_cache_drain(page, rx_buffer->pagecnt_bias); } /* clear contents of rx_buffer */ diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 5826b1ddedcf..fbd220d137b3 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -1817,7 +1817,7 @@ ixgb_clean(struct napi_struct *napi, int budget) /* If budget not fully consumed, exit the polling mode */ if (work_done < budget) { - napi_complete(napi); + napi_complete_done(napi, work_done); if (!test_bit(__IXGB_DOWN, &adapter->flags)) ixgb_irq_enable(adapter); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index aeedc812ee27..a2cc43d28888 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -55,9 +55,6 @@ #include <net/busy_poll.h> -#ifdef CONFIG_NET_RX_BUSY_POLL -#define BP_EXTENDED_STATS -#endif /* common prefix used by pr_<> macros */ #undef pr_fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -94,6 +91,14 @@ #define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ +#define IXGBE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#if (PAGE_SIZE < 8192) +#define IXGBE_MAX_FRAME_BUILD_SKB \ + (SKB_WITH_OVERHEAD(IXGBE_RXBUFFER_2K) - IXGBE_SKB_PAD) +#else +#define IGB_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K +#endif + /* * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we * reserve 64 more, and skb_shared_info adds an additional 320 bytes more, @@ -107,6 +112,9 @@ /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define IXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define IXGBE_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + enum ixgbe_tx_flags { /* cmd_type flags */ IXGBE_TX_FLAGS_HW_VLAN = 0x01, @@ -195,17 +203,17 @@ struct ixgbe_rx_buffer { struct sk_buff *skb; dma_addr_t dma; struct page *page; - unsigned int page_offset; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 page_offset; +#else + __u16 page_offset; +#endif + __u16 pagecnt_bias; }; struct ixgbe_queue_stats { u64 packets; u64 bytes; -#ifdef BP_EXTENDED_STATS - u64 yields; - u64 misses; - u64 cleaned; -#endif /* BP_EXTENDED_STATS */ }; struct ixgbe_tx_queue_stats { @@ -226,15 +234,20 @@ struct ixgbe_rx_queue_stats { #define IXGBE_TS_HDR_LEN 8 enum ixgbe_ring_state_t { + __IXGBE_RX_3K_BUFFER, + __IXGBE_RX_BUILD_SKB_ENABLED, + __IXGBE_RX_RSC_ENABLED, + __IXGBE_RX_CSUM_UDP_ZERO_ERR, + __IXGBE_RX_FCOE, __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_XPS_INIT_DONE, __IXGBE_TX_DETECT_HANG, __IXGBE_HANG_CHECK_ARMED, - __IXGBE_RX_RSC_ENABLED, - __IXGBE_RX_CSUM_UDP_ZERO_ERR, - __IXGBE_RX_FCOE, }; +#define ring_uses_build_skb(ring) \ + test_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &(ring)->state) + struct ixgbe_fwd_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct net_device *netdev; @@ -344,19 +357,20 @@ struct ixgbe_ring_feature { */ static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) { -#ifdef IXGBE_FCOE - if (test_bit(__IXGBE_RX_FCOE, &ring->state)) - return (PAGE_SIZE < 8192) ? IXGBE_RXBUFFER_4K : - IXGBE_RXBUFFER_3K; + if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + return IXGBE_RXBUFFER_3K; +#if (PAGE_SIZE < 8192) + if (ring_uses_build_skb(ring)) + return IXGBE_MAX_FRAME_BUILD_SKB; #endif return IXGBE_RXBUFFER_2K; } static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) { -#ifdef IXGBE_FCOE - if (test_bit(__IXGBE_RX_FCOE, &ring->state)) - return (PAGE_SIZE < 8192) ? 1 : 0; +#if (PAGE_SIZE < 8192) + if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + return 1; #endif return 0; } @@ -399,127 +413,10 @@ struct ixgbe_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; -#ifdef CONFIG_NET_RX_BUSY_POLL - atomic_t state; -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* for dynamic allocation of rings associated with this q_vector */ struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp; }; -#ifdef CONFIG_NET_RX_BUSY_POLL -enum ixgbe_qv_state_t { - IXGBE_QV_STATE_IDLE = 0, - IXGBE_QV_STATE_NAPI, - IXGBE_QV_STATE_POLL, - IXGBE_QV_STATE_DISABLE -}; - -static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) -{ - /* reset state to idle */ - atomic_set(&q_vector->state, IXGBE_QV_STATE_IDLE); -} - -/* called from the device poll routine to get ownership of a q_vector */ -static inline bool ixgbe_qv_lock_napi(struct ixgbe_q_vector *q_vector) -{ - int rc = atomic_cmpxchg(&q_vector->state, IXGBE_QV_STATE_IDLE, - IXGBE_QV_STATE_NAPI); -#ifdef BP_EXTENDED_STATS - if (rc != IXGBE_QV_STATE_IDLE) - q_vector->tx.ring->stats.yields++; -#endif - - return rc == IXGBE_QV_STATE_IDLE; -} - -/* returns true is someone tried to get the qv while napi had it */ -static inline void ixgbe_qv_unlock_napi(struct ixgbe_q_vector *q_vector) -{ - WARN_ON(atomic_read(&q_vector->state) != IXGBE_QV_STATE_NAPI); - - /* flush any outstanding Rx frames */ - if (q_vector->napi.gro_list) - napi_gro_flush(&q_vector->napi, false); - - /* reset state to idle */ - atomic_set(&q_vector->state, IXGBE_QV_STATE_IDLE); -} - -/* called from ixgbe_low_latency_poll() */ -static inline bool ixgbe_qv_lock_poll(struct ixgbe_q_vector *q_vector) -{ - int rc = atomic_cmpxchg(&q_vector->state, IXGBE_QV_STATE_IDLE, - IXGBE_QV_STATE_POLL); -#ifdef BP_EXTENDED_STATS - if (rc != IXGBE_QV_STATE_IDLE) - q_vector->rx.ring->stats.yields++; -#endif - return rc == IXGBE_QV_STATE_IDLE; -} - -/* returns true if someone tried to get the qv while it was locked */ -static inline void ixgbe_qv_unlock_poll(struct ixgbe_q_vector *q_vector) -{ - WARN_ON(atomic_read(&q_vector->state) != IXGBE_QV_STATE_POLL); - - /* reset state to idle */ - atomic_set(&q_vector->state, IXGBE_QV_STATE_IDLE); -} - -/* true if a socket is polling, even if it did not get the lock */ -static inline bool ixgbe_qv_busy_polling(struct ixgbe_q_vector *q_vector) -{ - return atomic_read(&q_vector->state) == IXGBE_QV_STATE_POLL; -} - -/* false if QV is currently owned */ -static inline bool ixgbe_qv_disable(struct ixgbe_q_vector *q_vector) -{ - int rc = atomic_cmpxchg(&q_vector->state, IXGBE_QV_STATE_IDLE, - IXGBE_QV_STATE_DISABLE); - - return rc == IXGBE_QV_STATE_IDLE; -} - -#else /* CONFIG_NET_RX_BUSY_POLL */ -static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) -{ -} - -static inline bool ixgbe_qv_lock_napi(struct ixgbe_q_vector *q_vector) -{ - return true; -} - -static inline bool ixgbe_qv_unlock_napi(struct ixgbe_q_vector *q_vector) -{ - return false; -} - -static inline bool ixgbe_qv_lock_poll(struct ixgbe_q_vector *q_vector) -{ - return false; -} - -static inline bool ixgbe_qv_unlock_poll(struct ixgbe_q_vector *q_vector) -{ - return false; -} - -static inline bool ixgbe_qv_busy_polling(struct ixgbe_q_vector *q_vector) -{ - return false; -} - -static inline bool ixgbe_qv_disable(struct ixgbe_q_vector *q_vector) -{ - return true; -} - -#endif /* CONFIG_NET_RX_BUSY_POLL */ - #ifdef CONFIG_IXGBE_HWMON #define IXGBE_HWMON_TYPE_LOC 0 @@ -664,6 +561,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) #define IXGBE_FLAG2_EEE_CAPABLE BIT(14) #define IXGBE_FLAG2_EEE_ENABLED BIT(15) +#define IXGBE_FLAG2_RX_LEGACY BIT(16) /* Tx fast path data */ int num_tx_queues; @@ -876,7 +774,7 @@ extern const struct ixgbe_info ixgbe_X550EM_x_info; extern const struct ixgbe_info ixgbe_x550em_a_info; extern const struct ixgbe_info ixgbe_x550em_a_fw_info; #ifdef CONFIG_IXGBE_DCB -extern const struct dcbnl_rtnl_ops dcbnl_ops; +extern const struct dcbnl_rtnl_ops ixgbe_dcbnl_ops; #endif extern char ixgbe_driver_name[]; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 094e1d63309a..c38d50c1fcf7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -350,7 +350,7 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) } IXGBE_WRITE_FLUSH(hw); -#ifndef CONFIG_SPARC +#ifndef CONFIG_ARCH_WANT_RELAX_ORDER /* Disable relaxed ordering */ for (i = 0; i < hw->mac.max_tx_queues; i++) { u32 regval; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index b8fc3cfec831..78c52375acc6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -777,7 +777,7 @@ static u8 ixgbe_dcbnl_setdcbx(struct net_device *dev, u8 mode) return err ? 1 : 0; } -const struct dcbnl_rtnl_ops dcbnl_ops = { +const struct dcbnl_rtnl_ops ixgbe_dcbnl_ops = { .ieee_getets = ixgbe_dcbnl_ieee_getets, .ieee_setets = ixgbe_dcbnl_ieee_setets, .ieee_getpfc = ixgbe_dcbnl_ieee_getpfc, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 17589068da78..a7574c7b12af 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -151,6 +151,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { }; #define IXGBE_TEST_LEN sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN +static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define IXGBE_PRIV_FLAGS_LEGACY_RX BIT(0) + "legacy-rx", +}; + +#define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings) + /* currently supported speeds for 10G */ #define ADVRTSD_MSK_10G (SUPPORTED_10000baseT_Full | \ SUPPORTED_10000baseKX4_Full | \ @@ -340,6 +347,9 @@ static int ixgbe_get_settings(struct net_device *netdev, case IXGBE_LINK_SPEED_10GB_FULL: ethtool_cmd_speed_set(ecmd, SPEED_10000); break; + case IXGBE_LINK_SPEED_5GB_FULL: + ethtool_cmd_speed_set(ecmd, SPEED_5000); + break; case IXGBE_LINK_SPEED_2_5GB_FULL: ethtool_cmd_speed_set(ecmd, SPEED_2500); break; @@ -998,6 +1008,8 @@ static void ixgbe_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); + + drvinfo->n_priv_flags = IXGBE_PRIV_FLAGS_STR_LEN; } static void ixgbe_get_ringparam(struct net_device *netdev, @@ -1137,6 +1149,8 @@ static int ixgbe_get_sset_count(struct net_device *netdev, int sset) return IXGBE_TEST_LEN; case ETH_SS_STATS: return IXGBE_STATS_LEN; + case ETH_SS_PRIV_FLAGS: + return IXGBE_PRIV_FLAGS_STR_LEN; default: return -EOPNOTSUPP; } @@ -1179,12 +1193,6 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, data[i] = 0; data[i+1] = 0; i += 2; -#ifdef BP_EXTENDED_STATS - data[i] = 0; - data[i+1] = 0; - data[i+2] = 0; - i += 3; -#endif continue; } @@ -1194,12 +1202,6 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, data[i+1] = ring->stats.bytes; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); i += 2; -#ifdef BP_EXTENDED_STATS - data[i] = ring->stats.yields; - data[i+1] = ring->stats.misses; - data[i+2] = ring->stats.cleaned; - i += 3; -#endif } for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) { ring = adapter->rx_ring[j]; @@ -1207,12 +1209,6 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, data[i] = 0; data[i+1] = 0; i += 2; -#ifdef BP_EXTENDED_STATS - data[i] = 0; - data[i+1] = 0; - data[i+2] = 0; - i += 3; -#endif continue; } @@ -1222,12 +1218,6 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, data[i+1] = ring->stats.bytes; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); i += 2; -#ifdef BP_EXTENDED_STATS - data[i] = ring->stats.yields; - data[i+1] = ring->stats.misses; - data[i+2] = ring->stats.cleaned; - i += 3; -#endif } for (j = 0; j < IXGBE_MAX_PACKET_BUFFERS; j++) { @@ -1264,28 +1254,12 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; sprintf(p, "tx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; -#ifdef BP_EXTENDED_STATS - sprintf(p, "tx_queue_%u_bp_napi_yield", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bp_misses", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bp_cleaned", i); - p += ETH_GSTRING_LEN; -#endif /* BP_EXTENDED_STATS */ } for (i = 0; i < IXGBE_NUM_RX_QUEUES; i++) { sprintf(p, "rx_queue_%u_packets", i); p += ETH_GSTRING_LEN; sprintf(p, "rx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; -#ifdef BP_EXTENDED_STATS - sprintf(p, "rx_queue_%u_bp_poll_yield", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bp_misses", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bp_cleaned", i); - p += ETH_GSTRING_LEN; -#endif /* BP_EXTENDED_STATS */ } for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) { sprintf(p, "tx_pb_%u_pxon", i); @@ -1301,6 +1275,9 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, } /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, ixgbe_priv_flags_strings, + IXGBE_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); } } @@ -1905,7 +1882,7 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, tx_ntc = tx_ring->next_to_clean; rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc); - while (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) { + while (rx_desc->wb.upper.length) { /* check Rx buffer */ rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; @@ -1927,7 +1904,16 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, /* unmap buffer on Tx side */ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; - ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); + + /* Free all the Tx ring sk_buffs */ + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); /* increment Rx/Tx next to clean counters */ rx_ntc++; @@ -3382,6 +3368,37 @@ static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_eee *edata) return 0; } +static u32 ixgbe_get_priv_flags(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) + priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX; + + return priv_flags; +} + +static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + unsigned int flags2 = adapter->flags2; + + flags2 &= ~IXGBE_FLAG2_RX_LEGACY; + if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX) + flags2 |= IXGBE_FLAG2_RX_LEGACY; + + if (flags2 != adapter->flags2) { + adapter->flags2 = flags2; + + /* reset interface to repopulate queues */ + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + } + + return 0; +} + static const struct ethtool_ops ixgbe_ethtool_ops = { .get_settings = ixgbe_get_settings, .set_settings = ixgbe_set_settings, @@ -3418,6 +3435,8 @@ static const struct ethtool_ops ixgbe_ethtool_ops = { .set_eee = ixgbe_set_eee, .get_channels = ixgbe_get_channels, .set_channels = ixgbe_set_channels, + .get_priv_flags = ixgbe_get_priv_flags, + .set_priv_flags = ixgbe_set_priv_flags, .get_ts_info = ixgbe_get_ts_info, .get_module_info = ixgbe_get_module_info, .get_module_eeprom = ixgbe_get_module_eeprom, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 10d29678d65e..1b8be7d813bd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -853,11 +853,6 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); -#ifdef CONFIG_NET_RX_BUSY_POLL - /* initialize busy poll */ - atomic_set(&q_vector->state, IXGBE_QV_STATE_DISABLE); - -#endif /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; q_vector->adapter = adapter; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ffe7d940d9ff..060cdce8058f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -72,7 +72,7 @@ char ixgbe_default_device_descr[] = static char ixgbe_default_device_descr[] = "Intel(R) 10 Gigabit Network Connection"; #endif -#define DRV_VERSION "4.4.0-k" +#define DRV_VERSION "5.0.0-k" const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = "Copyright (c) 1999-2016 Intel Corporation."; @@ -611,12 +611,11 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) if (netdev) { dev_info(&adapter->pdev->dev, "Net device Info\n"); pr_info("Device Name state " - "trans_start last_rx\n"); - pr_info("%-15s %016lX %016lX %016lX\n", + "trans_start\n"); + pr_info("%-15s %016lX %016lX\n", netdev->name, netdev->state, - dev_trans_start(netdev), - netdev->last_rx); + dev_trans_start(netdev)); } /* Print Registers */ @@ -946,28 +945,6 @@ static inline void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, } } -void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *ring, - struct ixgbe_tx_buffer *tx_buffer) -{ - if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } - tx_buffer->next_to_watch = NULL; - tx_buffer->skb = NULL; - dma_unmap_len_set(tx_buffer, len, 0); - /* tx_buffer must be completely set up in the transmit path */ -} - static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -1199,7 +1176,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, DMA_TO_DEVICE); /* clear tx_buffer data */ - tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); /* unmap remaining buffers */ @@ -1553,6 +1529,11 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, } } +static inline unsigned int ixgbe_rx_offset(struct ixgbe_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IXGBE_SKB_PAD : 0; +} + static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *bi) { @@ -1571,8 +1552,10 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); /* * if mapping failed free memory back to system since @@ -1587,7 +1570,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = ixgbe_rx_offset(rx_ring); + bi->pagecnt_bias = 1; return true; } @@ -1602,6 +1586,7 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; u16 i = rx_ring->next_to_use; + u16 bufsz; /* nothing to do */ if (!cleaned_count) @@ -1611,10 +1596,17 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; + bufsz = ixgbe_rx_bufsz(rx_ring); + do { if (!ixgbe_alloc_mapped_page(rx_ring, bi)) break; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, bufsz, + DMA_FROM_DEVICE); + /* * Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. @@ -1630,8 +1622,8 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; } - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.upper.status_error = 0; + /* clear the length for the next_to_use descriptor */ + rx_desc->wb.upper.length = 0; cleaned_count--; } while (cleaned_count); @@ -1721,11 +1713,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, struct sk_buff *skb) { - skb_mark_napi_id(skb, &q_vector->napi); - if (ixgbe_qv_busy_polling(q_vector)) - netif_receive_skb(skb); - else - napi_gro_receive(&q_vector->napi, skb); + napi_gro_receive(&q_vector->napi, skb); } /** @@ -1837,19 +1825,19 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, { /* if the page was released unmap it, else just sync our portion */ if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - IXGBE_CB(skb)->page_released = false; + dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); } else { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, frag->page_offset, - ixgbe_rx_bufsz(rx_ring), + skb_frag_size(frag), DMA_FROM_DEVICE); } - IXGBE_CB(skb)->dma = 0; } /** @@ -1885,7 +1873,7 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, } /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) + if (!skb_headlen(skb)) ixgbe_pull_tail(rx_ring, skb); #ifdef IXGBE_FCOE @@ -1920,14 +1908,14 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, nta++; rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - /* transfer page from old buffer to new buffer */ - *new_buff = *old_buff; - - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, - new_buff->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + /* Transfer page from old buffer to new buffer. + * Move each member individually to avoid possible store + * forwarding stalls and unnecessary copy of skb. + */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; } static inline bool ixgbe_page_is_reserved(struct page *page) @@ -1935,6 +1923,43 @@ static inline bool ixgbe_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } +static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) +{ + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; + + /* avoid re-using remote pages */ + if (unlikely(ixgbe_page_is_reserved(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + return false; +#else + /* The last offset is a bit aggressive in that we assume the + * worst case of FCoE being enabled and using a 3K buffer. + * However this should have minimal impact as the 1K extra is + * still less than one buffer in size. + */ +#define IXGBE_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBE_RXBUFFER_3K) + if (rx_buffer->page_offset > IXGBE_LAST_OFFSET) + return false; +#endif + + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(!pagecnt_bias)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } + + return true; +} + /** * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -1950,144 +1975,172 @@ static inline bool ixgbe_page_is_reserved(struct page *page) * The function will then update the page offset if necessary and return * true if the buffer can be reused by the adapter. **/ -static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, +static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); #if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbe_rx_bufsz(rx_ring); + unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) - - ixgbe_rx_bufsz(rx_ring); + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) : + SKB_DATA_ALIGN(size); #endif - - if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!ixgbe_page_is_reserved(page))) - return true; - - /* this page cannot be reused so discard it */ - __free_pages(page, ixgbe_rx_pg_order(rx_ring)); - return false; - } - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(ixgbe_page_is_reserved(page))) - return false; - #if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; - - /* flip page offset to other buffer */ rx_buffer->page_offset ^= truesize; #else - /* move offset up to the next cache line */ rx_buffer->page_offset += truesize; - - if (rx_buffer->page_offset > last_offset) - return false; #endif - - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - page_ref_inc(page); - - return true; } -static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc) +static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff **skb, + const unsigned int size) { struct ixgbe_rx_buffer *rx_buffer; - struct sk_buff *skb; - struct page *page; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); + prefetchw(rx_buffer->page); + *skb = rx_buffer->skb; - skb = rx_buffer->skb; + /* Delay unmapping of the first packet. It carries the header + * information, HW may still access the header after the writeback. + * Only unmap it when EOP is reached + */ + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) { + if (!*skb) + goto skip_sync; + } else { + if (*skb) + ixgbe_dma_sync_frag(rx_ring, *skb); + } - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); +skip_sync: + rx_buffer->pagecnt_bias--; - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif + return rx_buffer; +} - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, - IXGBE_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return NULL; +static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + struct sk_buff *skb) +{ + if (ixgbe_can_reuse_rx_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else { + if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); } + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } - /* - * we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; + rx_buffer->skb = NULL; +} - /* - * Delay unmapping of the first packet. It carries the - * header information, HW may still access the header - * after the writeback. Only unmap it when EOP is - * reached - */ - if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - goto dma_sync; +static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + struct sk_buff *skb; - IXGBE_CB(skb)->dma = rx_buffer->dma; - } else { - if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) - ixgbe_dma_sync_frag(rx_ring, skb); + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif -dma_sync: - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + /* allocate a skb to store the frags */ + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) + return NULL; - rx_buffer->skb = NULL; - } + if (size > IXGBE_RX_HDR_SIZE) { + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + IXGBE_CB(skb)->dma = rx_buffer->dma; - /* pull page into skb */ - if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { - /* hand second half of page back to the ring */ - ixgbe_reuse_rx_page(rx_ring, rx_buffer); - } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { - /* the page has been released from the ring */ - IXGBE_CB(skb)->page_released = true; + skb_add_rx_frag(skb, 0, rx_buffer->page, + rx_buffer->page_offset, + size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE); + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + rx_buffer->pagecnt_bias++; } - /* clear contents of buffer_info */ - rx_buffer->page = NULL; + return skb; +} + +static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(IXGBE_SKB_PAD + size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* build an skb to around the page buffer */ + skb = build_skb(va - IXGBE_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, IXGBE_SKB_PAD); + __skb_put(skb, size); + + /* record DMA address if this is the start of a chain of buffers */ + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + IXGBE_CB(skb)->dma = rx_buffer->dma; + + /* update buffer offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif return skb; } @@ -2119,7 +2172,9 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; + struct ixgbe_rx_buffer *rx_buffer; struct sk_buff *skb; + unsigned int size; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { @@ -2128,8 +2183,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); - - if (!rx_desc->wb.upper.status_error) + size = le16_to_cpu(rx_desc->wb.upper.length); + if (!size) break; /* This memory barrier is needed to keep us from reading @@ -2138,13 +2193,26 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ dma_rmb(); + rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size); + /* retrieve a buffer from the ring */ - skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); + if (skb) + ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = ixgbe_build_skb(rx_ring, rx_buffer, + rx_desc, size); + else + skb = ixgbe_construct_skb(rx_ring, rx_buffer, + rx_desc, size); /* exit if we failed to retrieve a buffer */ - if (!skb) + if (!skb) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_buffer->pagecnt_bias++; break; + } + ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb); cleaned_count++; /* place incomplete frames back on ring for completion */ @@ -2202,40 +2270,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, return total_rx_packets; } -#ifdef CONFIG_NET_RX_BUSY_POLL -/* must be called with local_bh_disable()d */ -static int ixgbe_low_latency_recv(struct napi_struct *napi) -{ - struct ixgbe_q_vector *q_vector = - container_of(napi, struct ixgbe_q_vector, napi); - struct ixgbe_adapter *adapter = q_vector->adapter; - struct ixgbe_ring *ring; - int found = 0; - - if (test_bit(__IXGBE_DOWN, &adapter->state)) - return LL_FLUSH_FAILED; - - if (!ixgbe_qv_lock_poll(q_vector)) - return LL_FLUSH_BUSY; - - ixgbe_for_each_ring(ring, q_vector->rx) { - found = ixgbe_clean_rx_irq(q_vector, ring, 4); -#ifdef BP_EXTENDED_STATS - if (found) - ring->stats.cleaned += found; - else - ring->stats.misses++; -#endif - if (found) - break; - } - - ixgbe_qv_unlock_poll(q_vector); - - return found; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /** * ixgbe_configure_msix - Configure MSI-X hardware * @adapter: board private structure @@ -2879,8 +2913,8 @@ int ixgbe_poll(struct napi_struct *napi, int budget) clean_complete = false; } - /* Exit if we are called by netpoll or busy polling is active */ - if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) + /* Exit if we are called by netpoll */ + if (budget <= 0) return budget; /* attempt to distribute budget to each queue fairly, but don't allow @@ -2899,7 +2933,6 @@ int ixgbe_poll(struct napi_struct *napi, int budget) clean_complete = false; } - ixgbe_qv_unlock_napi(q_vector); /* If all work not completed, return budget and keep polling */ if (!clean_complete) return budget; @@ -3237,6 +3270,10 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state); + /* reinitialize tx_buffer_info */ + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbe_tx_buffer) * ring->count); + /* enable queue */ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl); @@ -3407,7 +3444,10 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; /* configure the packet buffer length */ - srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) + srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; /* configure descriptor type */ srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -3708,6 +3748,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; + union ixgbe_adv_rx_desc *rx_desc; u64 rdba = ring->dma; u32 rxdctl; u8 reg_idx = ring->reg_idx; @@ -3740,8 +3781,27 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; +#if (PAGE_SIZE < 8192) + } else { + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | + IXGBE_RXDCTL_RLPML_EN); + + /* Limit the maximum frame size so we don't overrun the skb */ + if (ring_uses_build_skb(ring) && + !test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + rxdctl |= IXGBE_MAX_FRAME_BUILD_SKB | + IXGBE_RXDCTL_RLPML_EN; +#endif } + /* initialize rx_buffer_info */ + memset(ring->rx_buffer_info, 0, + sizeof(struct ixgbe_rx_buffer) * ring->count); + + /* initialize Rx descriptor 0 */ + rx_desc = IXGBE_RX_DESC(ring, 0); + rx_desc->wb.upper.length = 0; + /* enable receive descriptor ring */ rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); @@ -3878,10 +3938,30 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) */ for (i = 0; i < adapter->num_rx_queues; i++) { rx_ring = adapter->rx_ring[i]; + + clear_ring_rsc_enabled(rx_ring); + clear_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); - else - clear_ring_rsc_enabled(rx_ring); + + if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state)) + set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + + clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) + continue; + + set_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + +#if (PAGE_SIZE < 8192) + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) + set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + + if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) + set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); +#endif } } @@ -4582,23 +4662,16 @@ static void ixgbe_napi_enable_all(struct ixgbe_adapter *adapter) { int q_idx; - for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) { - ixgbe_qv_init_lock(adapter->q_vector[q_idx]); + for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) napi_enable(&adapter->q_vector[q_idx]->napi); - } } static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) { int q_idx; - for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) { + for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) napi_disable(&adapter->q_vector[q_idx]->napi); - while (!ixgbe_qv_disable(adapter->q_vector[q_idx])) { - pr_info("QV %d locked\n", q_idx); - usleep_range(1000, 20000); - } - } } static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask) @@ -4902,45 +4975,47 @@ static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter) **/ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) { - struct device *dev = rx_ring->dev; - unsigned long size; - u16 i; - - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_buffer_info) - return; + u16 i = rx_ring->next_to_clean; + struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i]; /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i]; - + while (i != rx_ring->next_to_alloc) { if (rx_buffer->skb) { struct sk_buff *skb = rx_buffer->skb; if (IXGBE_CB(skb)->page_released) - dma_unmap_page(dev, - IXGBE_CB(skb)->dma, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + dma_unmap_page_attrs(rx_ring->dev, + IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); dev_kfree_skb(skb); - rx_buffer->skb = NULL; } - if (!rx_buffer->page) - continue; + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); - dma_unmap_page(dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - __free_pages(rx_buffer->page, ixgbe_rx_pg_order(rx_ring)); + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); - rx_buffer->page = NULL; + i++; + rx_buffer++; + if (i == rx_ring->count) { + i = 0; + rx_buffer = rx_ring->rx_buffer_info; + } } - size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_buffer_info, 0, size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -5409,28 +5484,57 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) **/ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring) { - struct ixgbe_tx_buffer *tx_buffer_info; - unsigned long size; - u16 i; + u16 i = tx_ring->next_to_clean; + struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; - /* ring already cleared, nothing to do */ - if (!tx_ring->tx_buffer_info) - return; + while (i != tx_ring->next_to_use) { + union ixgbe_adv_tx_desc *eop_desc, *tx_desc; - /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - } + /* Free all the Tx ring sk_buffs */ + dev_kfree_skb_any(tx_buffer->skb); - netdev_tx_reset_queue(txring_txq(tx_ring)); + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); - size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; - memset(tx_ring->tx_buffer_info, 0, size); + /* check for eop_desc to determine the end of the packet */ + eop_desc = tx_buffer->next_to_watch; + tx_desc = IXGBE_TX_DESC(tx_ring, i); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } - /* Zero out the descriptor ring */ - memset(tx_ring->desc, 0, tx_ring->size); + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + } + } + /* reset BQL for queue */ + netdev_tx_reset_queue(txring_txq(tx_ring)); + + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; } @@ -5876,9 +5980,9 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) if (tx_ring->q_vector) ring_node = tx_ring->q_vector->numa_node; - tx_ring->tx_buffer_info = vzalloc_node(size, ring_node); + tx_ring->tx_buffer_info = vmalloc_node(size, ring_node); if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -5960,9 +6064,9 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) if (rx_ring->q_vector) ring_node = rx_ring->q_vector->numa_node; - rx_ring->rx_buffer_info = vzalloc_node(size, ring_node); + rx_ring->rx_buffer_info = vmalloc_node(size, ring_node); if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); + rx_ring->rx_buffer_info = vmalloc(size); if (!rx_ring->rx_buffer_info) goto err; @@ -7677,18 +7781,32 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, return; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); + tx_buffer = &tx_ring->tx_buffer_info[i]; /* clear dma mappings for failed tx_buffer_info map */ - for (;;) { + while (tx_buffer != first) { + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + if (i--) + i += tx_ring->count; tx_buffer = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); - if (tx_buffer == first) - break; - if (i == 0) - i = tx_ring->count; - i--; } + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + dev_kfree_skb_any(first->skb); + first->skb = NULL; + tx_ring->next_to_use = i; } @@ -9353,9 +9471,6 @@ static const struct net_device_ops ixgbe_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = ixgbe_low_latency_recv, -#endif #ifdef IXGBE_FCOE .ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get, .ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target, @@ -9737,7 +9852,7 @@ skip_sriov: #ifdef CONFIG_IXGBE_DCB if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) - netdev->dcbnl_ops = &dcbnl_ops; + netdev->dcbnl_ops = &ixgbe_dcbnl_ops; #endif #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 2fcde8777a29..e55b2602f371 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -768,9 +768,7 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg_wait_to_complete) { - - /* - * Clear autoneg_advertised and set new values based on input link + /* Clear autoneg_advertised and set new values based on input link * speed. */ hw->phy.autoneg_advertised = 0; @@ -778,6 +776,12 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_10GB_FULL) hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + if (speed & IXGBE_LINK_SPEED_5GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_5GB_FULL; + + if (speed & IXGBE_LINK_SPEED_2_5GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_2_5GB_FULL; + if (speed & IXGBE_LINK_SPEED_1GB_FULL) hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 508e72c5f1c2..1f6c0ecd50bb 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -432,11 +432,6 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, if (!ring) { data[i++] = 0; data[i++] = 0; -#ifdef BP_EXTENDED_STATS - data[i++] = 0; - data[i++] = 0; - data[i++] = 0; -#endif continue; } @@ -446,12 +441,6 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, data[i + 1] = ring->stats.bytes; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); i += 2; -#ifdef BP_EXTENDED_STATS - data[i] = ring->stats.yields; - data[i + 1] = ring->stats.misses; - data[i + 2] = ring->stats.cleaned; - i += 3; -#endif } /* populate Rx queue data */ @@ -460,11 +449,6 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, if (!ring) { data[i++] = 0; data[i++] = 0; -#ifdef BP_EXTENDED_STATS - data[i++] = 0; - data[i++] = 0; - data[i++] = 0; -#endif continue; } @@ -474,12 +458,6 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, data[i + 1] = ring->stats.bytes; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); i += 2; -#ifdef BP_EXTENDED_STATS - data[i] = ring->stats.yields; - data[i + 1] = ring->stats.misses; - data[i + 2] = ring->stats.cleaned; - i += 3; -#endif } } @@ -507,28 +485,12 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; sprintf(p, "tx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; -#ifdef BP_EXTENDED_STATS - sprintf(p, "tx_queue_%u_bp_napi_yield", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bp_misses", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bp_cleaned", i); - p += ETH_GSTRING_LEN; -#endif /* BP_EXTENDED_STATS */ } for (i = 0; i < adapter->num_rx_queues; i++) { sprintf(p, "rx_queue_%u_packets", i); p += ETH_GSTRING_LEN; sprintf(p, "rx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; -#ifdef BP_EXTENDED_STATS - sprintf(p, "rx_queue_%u_bp_poll_yield", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bp_misses", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bp_cleaned", i); - p += ETH_GSTRING_LEN; -#endif /* BP_EXTENDED_STATS */ } break; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 3fe6504eeab1..a8cbc2dda0dd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -37,11 +37,6 @@ #include "vf.h" -#ifdef CONFIG_NET_RX_BUSY_POLL -#include <net/busy_poll.h> -#define BP_EXTENDED_STATS -#endif - #define IXGBE_MAX_TXD_PWR 14 #define IXGBE_MAX_DATA_PER_TXD BIT(IXGBE_MAX_TXD_PWR) @@ -73,11 +68,6 @@ struct ixgbevf_rx_buffer { struct ixgbevf_stats { u64 packets; u64 bytes; -#ifdef BP_EXTENDED_STATS - u64 yields; - u64 misses; - u64 cleaned; -#endif }; struct ixgbevf_tx_queue_stats { @@ -217,109 +207,6 @@ struct ixgbevf_q_vector { #endif /* CONFIG_NET_RX_BUSY_POLL */ }; -#ifdef CONFIG_NET_RX_BUSY_POLL -static inline void ixgbevf_qv_init_lock(struct ixgbevf_q_vector *q_vector) -{ - spin_lock_init(&q_vector->lock); - q_vector->state = IXGBEVF_QV_STATE_IDLE; -} - -/* called from the device poll routine to get ownership of a q_vector */ -static inline bool ixgbevf_qv_lock_napi(struct ixgbevf_q_vector *q_vector) -{ - int rc = true; - - spin_lock_bh(&q_vector->lock); - if (q_vector->state & IXGBEVF_QV_LOCKED) { - WARN_ON(q_vector->state & IXGBEVF_QV_STATE_NAPI); - q_vector->state |= IXGBEVF_QV_STATE_NAPI_YIELD; - rc = false; -#ifdef BP_EXTENDED_STATS - q_vector->tx.ring->stats.yields++; -#endif - } else { - /* we don't care if someone yielded */ - q_vector->state = IXGBEVF_QV_STATE_NAPI; - } - spin_unlock_bh(&q_vector->lock); - return rc; -} - -/* returns true is someone tried to get the qv while napi had it */ -static inline bool ixgbevf_qv_unlock_napi(struct ixgbevf_q_vector *q_vector) -{ - int rc = false; - - spin_lock_bh(&q_vector->lock); - WARN_ON(q_vector->state & (IXGBEVF_QV_STATE_POLL | - IXGBEVF_QV_STATE_NAPI_YIELD)); - - if (q_vector->state & IXGBEVF_QV_STATE_POLL_YIELD) - rc = true; - /* reset state to idle, unless QV is disabled */ - q_vector->state &= IXGBEVF_QV_STATE_DISABLED; - spin_unlock_bh(&q_vector->lock); - return rc; -} - -/* called from ixgbevf_low_latency_poll() */ -static inline bool ixgbevf_qv_lock_poll(struct ixgbevf_q_vector *q_vector) -{ - int rc = true; - - spin_lock_bh(&q_vector->lock); - if ((q_vector->state & IXGBEVF_QV_LOCKED)) { - q_vector->state |= IXGBEVF_QV_STATE_POLL_YIELD; - rc = false; -#ifdef BP_EXTENDED_STATS - q_vector->rx.ring->stats.yields++; -#endif - } else { - /* preserve yield marks */ - q_vector->state |= IXGBEVF_QV_STATE_POLL; - } - spin_unlock_bh(&q_vector->lock); - return rc; -} - -/* returns true if someone tried to get the qv while it was locked */ -static inline bool ixgbevf_qv_unlock_poll(struct ixgbevf_q_vector *q_vector) -{ - int rc = false; - - spin_lock_bh(&q_vector->lock); - WARN_ON(q_vector->state & (IXGBEVF_QV_STATE_NAPI)); - - if (q_vector->state & IXGBEVF_QV_STATE_POLL_YIELD) - rc = true; - /* reset state to idle, unless QV is disabled */ - q_vector->state &= IXGBEVF_QV_STATE_DISABLED; - spin_unlock_bh(&q_vector->lock); - return rc; -} - -/* true if a socket is polling, even if it did not get the lock */ -static inline bool ixgbevf_qv_busy_polling(struct ixgbevf_q_vector *q_vector) -{ - WARN_ON(!(q_vector->state & IXGBEVF_QV_OWNED)); - return q_vector->state & IXGBEVF_QV_USER_PEND; -} - -/* false if QV is currently owned */ -static inline bool ixgbevf_qv_disable(struct ixgbevf_q_vector *q_vector) -{ - int rc = true; - - spin_lock_bh(&q_vector->lock); - if (q_vector->state & IXGBEVF_QV_OWNED) - rc = false; - q_vector->state |= IXGBEVF_QV_STATE_DISABLED; - spin_unlock_bh(&q_vector->lock); - return rc; -} - -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* microsecond values for various ITR rates shifted by 2 to fit itr register * with the first 3 bits reserved 0 */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index b06863560c7d..80bab261a0ec 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -457,16 +457,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector, struct sk_buff *skb) { -#ifdef CONFIG_NET_RX_BUSY_POLL - skb_mark_napi_id(skb, &q_vector->napi); - - if (ixgbevf_qv_busy_polling(q_vector)) { - netif_receive_skb(skb); - /* exit early if we busy polled */ - return; - } -#endif /* CONFIG_NET_RX_BUSY_POLL */ - napi_gro_receive(&q_vector->napi, skb); } @@ -1031,10 +1021,6 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) if (budget <= 0) return budget; -#ifdef CONFIG_NET_RX_BUSY_POLL - if (!ixgbevf_qv_lock_napi(q_vector)) - return budget; -#endif /* attempt to distribute budget to each queue fairly, but don't allow * the budget to go below 1 because we'll exit polling @@ -1052,10 +1038,6 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) clean_complete = false; } -#ifdef CONFIG_NET_RX_BUSY_POLL - ixgbevf_qv_unlock_napi(q_vector); -#endif - /* If all work not completed, return budget and keep polling */ if (!clean_complete) return budget; @@ -1090,40 +1072,6 @@ void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector) IXGBE_WRITE_REG(hw, IXGBE_VTEITR(v_idx), itr_reg); } -#ifdef CONFIG_NET_RX_BUSY_POLL -/* must be called with local_bh_disable()d */ -static int ixgbevf_busy_poll_recv(struct napi_struct *napi) -{ - struct ixgbevf_q_vector *q_vector = - container_of(napi, struct ixgbevf_q_vector, napi); - struct ixgbevf_adapter *adapter = q_vector->adapter; - struct ixgbevf_ring *ring; - int found = 0; - - if (test_bit(__IXGBEVF_DOWN, &adapter->state)) - return LL_FLUSH_FAILED; - - if (!ixgbevf_qv_lock_poll(q_vector)) - return LL_FLUSH_BUSY; - - ixgbevf_for_each_ring(ring, q_vector->rx) { - found = ixgbevf_clean_rx_irq(q_vector, ring, 4); -#ifdef BP_EXTENDED_STATS - if (found) - ring->stats.cleaned += found; - else - ring->stats.misses++; -#endif - if (found) - break; - } - - ixgbevf_qv_unlock_poll(q_vector); - - return found; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /** * ixgbevf_configure_msix - Configure MSI-X hardware * @adapter: board private structure @@ -1960,9 +1908,6 @@ static void ixgbevf_napi_enable_all(struct ixgbevf_adapter *adapter) for (q_idx = 0; q_idx < q_vectors; q_idx++) { q_vector = adapter->q_vector[q_idx]; -#ifdef CONFIG_NET_RX_BUSY_POLL - ixgbevf_qv_init_lock(adapter->q_vector[q_idx]); -#endif napi_enable(&q_vector->napi); } } @@ -1976,12 +1921,6 @@ static void ixgbevf_napi_disable_all(struct ixgbevf_adapter *adapter) for (q_idx = 0; q_idx < q_vectors; q_idx++) { q_vector = adapter->q_vector[q_idx]; napi_disable(&q_vector->napi); -#ifdef CONFIG_NET_RX_BUSY_POLL - while (!ixgbevf_qv_disable(adapter->q_vector[q_idx])) { - pr_info("QV %d locked\n", q_idx); - usleep_range(1000, 20000); - } -#endif /* CONFIG_NET_RX_BUSY_POLL */ } } @@ -3978,9 +3917,6 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_tx_timeout = ixgbevf_tx_timeout, .ndo_vlan_rx_add_vid = ixgbevf_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ixgbevf_vlan_rx_kill_vid, -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = ixgbevf_busy_poll_recv, -#endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbevf_netpoll, #endif |