diff options
39 files changed, 1267 insertions, 208 deletions
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index e83e3bf850d5..984c98f33258 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -45,6 +45,8 @@ enum hinic_port_cmd { HINIC_PORT_CMD_SET_RX_CSUM = 26, + HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD = 27, + HINIC_PORT_CMD_GET_PORT_STATISTICS = 28, HINIC_PORT_CMD_CLEAR_PORT_STATISTICS = 29, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h index c6b809e24983..f4b6d2c1061f 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h @@ -222,6 +222,8 @@ #define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT 0 #define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK 0xFFFU +#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT 21 +#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK 0x1U #define RQ_CQE_OFFOLAD_TYPE_GET(val, member) (((val) >> \ RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \ @@ -230,6 +232,19 @@ #define HINIC_GET_RX_PKT_TYPE(offload_type) \ RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE) +#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN) + +#define RQ_CQE_SGE_VLAN_MASK 0xFFFFU +#define RQ_CQE_SGE_VLAN_SHIFT 0 + +#define RQ_CQE_SGE_GET(val, member) (((val) >> \ + RQ_CQE_SGE_##member##_SHIFT) & \ + RQ_CQE_SGE_##member##_MASK) + +#define HINIC_GET_RX_VLAN_TAG(vlan_len) \ + RQ_CQE_SGE_GET(vlan_len, VLAN) + #define HINIC_RSS_TYPE_VALID_SHIFT 23 #define HINIC_RSS_TYPE_TCP_IPV6_EXT_SHIFT 24 #define HINIC_RSS_TYPE_IPV6_EXT_SHIFT 25 diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index ceb0e247f52d..2411ad270c98 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -836,14 +836,14 @@ static const struct net_device_ops hinic_netdev_ops = { .ndo_get_stats64 = hinic_get_stats64, .ndo_fix_features = hinic_fix_features, .ndo_set_features = hinic_set_features, - }; static void netdev_features_init(struct net_device *netdev) { netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_RXCSUM | NETIF_F_LRO; + NETIF_F_RXCSUM | NETIF_F_LRO | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; netdev->vlan_features = netdev->hw_features; @@ -923,6 +923,11 @@ static int set_features(struct hinic_dev *nic_dev, HINIC_LRO_MAX_WQE_NUM_DEFAULT); } + if (changed & NETIF_F_HW_VLAN_CTAG_RX) + err = hinic_set_rx_vlan_offload(nic_dev, + !!(features & + NETIF_F_HW_VLAN_CTAG_RX)); + return err; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index c07adf793215..1bbeb91be808 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -431,6 +431,36 @@ int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en) return 0; } +int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_vlan_cfg vlan_cfg; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u16 out_size; + int err; + + if (!hwdev) + return -EINVAL; + + hwif = hwdev->hwif; + pdev = hwif->pdev; + vlan_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif); + vlan_cfg.vlan_rx_offload = en; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD, + &vlan_cfg, sizeof(vlan_cfg), + &vlan_cfg, &out_size); + if (err || !out_size || vlan_cfg.status) { + dev_err(&pdev->dev, + "Failed to set rx vlan offload, err: %d, status: 0x%x, out size: 0x%x\n", + err, vlan_cfg.status, out_size); + return -EINVAL; + } + + return 0; +} + int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs) { struct hinic_hwdev *hwdev = nic_dev->hwdev; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h index 16140a13000b..1bc47c7a5c00 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h @@ -223,6 +223,16 @@ struct hinic_lro_timer { u32 timer; }; +struct hinic_vlan_cfg { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 vlan_rx_offload; + u8 rsvd1[5]; +}; + struct hinic_rss_template_mgmt { u8 status; u8 version; @@ -558,4 +568,7 @@ int hinic_get_phy_port_stats(struct hinic_dev *nic_dev, int hinic_get_vport_stats(struct hinic_dev *nic_dev, struct hinic_vport_stats *stats); + +int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en); + #endif diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 609ad4333cdd..56ea6d692f1c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -18,6 +18,7 @@ #include <linux/dma-mapping.h> #include <linux/prefetch.h> #include <linux/cpumask.h> +#include <linux/if_vlan.h> #include <asm/barrier.h> #include "hinic_common.h" @@ -325,6 +326,7 @@ static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb, static int rxq_recv(struct hinic_rxq *rxq, int budget) { struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq); + struct net_device *netdev = rxq->netdev; u64 pkt_len = 0, rx_bytes = 0; struct hinic_rq *rq = rxq->rq; struct hinic_rq_wqe *rq_wqe; @@ -334,8 +336,11 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) struct hinic_sge sge; unsigned int status; struct sk_buff *skb; + u32 offload_type; u16 ci, num_lro; u16 num_wqe = 0; + u32 vlan_len; + u16 vid; while (pkts < budget) { num_wqes = 0; @@ -368,6 +373,14 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) hinic_rq_put_wqe(rq, ci, (num_wqes + 1) * HINIC_RQ_WQE_SIZE); + offload_type = be32_to_cpu(cqe->offload_type); + vlan_len = be32_to_cpu(cqe->len); + if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)) { + vid = HINIC_GET_RX_VLAN_TAG(vlan_len); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } + skb_record_rx_queue(skb, qp->q_id); skb->protocol = eth_type_trans(skb, rxq->netdev); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index f4f76370cd65..9c78251f9c39 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -407,10 +407,20 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info, return 1; } +static void offload_vlan(struct hinic_sq_task *task, u32 *queue_info, + u16 vlan_tag, u16 vlan_pri) +{ + task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) | + HINIC_SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD); + + *queue_info |= HINIC_SQ_CTRL_SET(vlan_pri, QUEUE_INFO_PRI); +} + static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task, u32 *queue_info) { enum hinic_offload_type offload = 0; + u16 vlan_tag; int enabled; enabled = offload_tso(task, queue_info, skb); @@ -424,6 +434,13 @@ static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task, return -EPROTONOSUPPORT; } + if (unlikely(skb_vlan_tag_present(skb))) { + vlan_tag = skb_vlan_tag_get(skb); + offload_vlan(task, queue_info, vlan_tag, + vlan_tag >> VLAN_PRIO_SHIFT); + offload |= TX_OFFLOAD_VLAN; + } + if (offload) hinic_task_set_l2hdr(task, skb_network_offset(skb)); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 551de8c2fef2..f703fa58458e 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3019,7 +3019,7 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, * applicable for weak-ordered memory model archs, * such as IA-64). */ - wmb(); + dma_wmb(); tx_ring->next_to_use = i; } @@ -4540,7 +4540,7 @@ e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter, * applicable for weak-ordered memory model archs, * such as IA-64). */ - wmb(); + dma_wmb(); writel(i, adapter->hw.hw_addr + rx_ring->rdt); } } @@ -4655,7 +4655,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, * applicable for weak-ordered memory model archs, * such as IA-64). */ - wmb(); + dma_wmb(); writel(i, hw->hw_addr + rx_ring->rdt); } } diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index f86d55657959..4b103cca8a39 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -680,7 +680,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) ew32(TCTL, E1000_TCTL_PSP); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); ctrl = er32(CTRL); diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index b9309302c29e..2c1bab377b2a 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -959,7 +959,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) ew32(TCTL, tctl); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Must acquire the MDIO ownership before MAC reset. * Ownership defaults to firmware after a reset. diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index fd550dee4982..63c3c79380a1 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -222,6 +222,9 @@ #define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ #define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master Req status */ +/* PCIm function state */ +#define E1000_STATUS_PCIM_STATE 0x40000000 + #define HALF_DUPLEX 1 #define FULL_DUPLEX 2 diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index be13227f1697..34cd67951aec 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -186,12 +186,13 @@ struct e1000_phy_regs { /* board specific private data structure */ struct e1000_adapter { - struct timer_list watchdog_timer; struct timer_list phy_info_timer; struct timer_list blink_timer; struct work_struct reset_task; - struct work_struct watchdog_task; + struct delayed_work watchdog_task; + + struct workqueue_struct *e1000_workqueue; const struct e1000_info *ei; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 02ebf208f48b..08342698386d 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -1014,7 +1014,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Test each interrupt */ for (i = 0; i < 10; i++) { @@ -1046,7 +1046,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) ew32(IMC, mask); ew32(ICS, mask); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (adapter->test_icr & mask) { *data = 3; @@ -1064,7 +1064,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) ew32(IMS, mask); ew32(ICS, mask); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (!(adapter->test_icr & mask)) { *data = 4; @@ -1082,7 +1082,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) ew32(IMC, ~mask & 0x00007FFF); ew32(ICS, ~mask & 0x00007FFF); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (adapter->test_icr) { *data = 5; @@ -1094,7 +1094,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Unhook test interrupt handler */ free_irq(irq, netdev); @@ -1470,7 +1470,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter) */ ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); return 0; } @@ -1584,7 +1584,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) hw->phy.media_type == e1000_media_type_internal_serdes) { ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); break; } /* Fall Through */ diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index cdae0efde8e6..395b05701480 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -271,7 +271,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) u16 count = 20; do { - usleep_range(5000, 10000); + usleep_range(5000, 6000); } while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--); msleep(30); @@ -405,7 +405,7 @@ out: /* Ungate automatic PHY configuration on non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(fwsm & E1000_ICH_FWSM_FW_VALID)) { - usleep_range(10000, 20000); + usleep_range(10000, 11000); e1000_gate_hw_phy_config_ich8lan(hw, false); } @@ -531,7 +531,7 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw) phy->id = 0; while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) && (i++ < 100)) { - usleep_range(1000, 2000); + usleep_range(1000, 1100); ret_val = e1000e_get_phy_id(hw); if (ret_val) return ret_val; @@ -1244,7 +1244,7 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) goto out; } - usleep_range(10000, 20000); + usleep_range(10000, 11000); } e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); @@ -1999,7 +1999,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && (i++ < 30)) - usleep_range(10000, 20000); + usleep_range(10000, 11000); return blocked ? E1000_BLK_PHY_RESET : 0; } @@ -2818,7 +2818,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) return 0; /* Allow time for h/w to get to quiescent state after reset */ - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Perform any necessary post-reset workarounds */ switch (hw->mac.type) { @@ -2854,7 +2854,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) if (hw->mac.type == e1000_pch2lan) { /* Ungate automatic PHY configuration on non-managed 82579 */ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { - usleep_range(10000, 20000); + usleep_range(10000, 11000); e1000_gate_hw_phy_config_ich8lan(hw, false); } @@ -3875,7 +3875,7 @@ release: */ if (!ret_val) { nvm->ops.reload(hw); - usleep_range(10000, 20000); + usleep_range(10000, 11000); } out: @@ -4026,7 +4026,7 @@ release: */ if (!ret_val) { nvm->ops.reload(hw); - usleep_range(10000, 20000); + usleep_range(10000, 11000); } out: @@ -4650,7 +4650,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) ew32(TCTL, E1000_TCTL_PSP); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Workaround for ICH8 bit corruption issue in FIFO memory */ if (hw->mac.type == e1000_ich8lan) { diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index 4abd55d646c5..e531976f8a67 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -797,7 +797,7 @@ static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) * milliseconds even if the other end is doing it in SW). */ for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { - usleep_range(10000, 20000); + usleep_range(10000, 11000); status = er32(STATUS); if (status & E1000_STATUS_LU) break; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index b081a1ef6859..e4baa13b3cda 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1780,7 +1780,8 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data) } /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, 1); } /* Reset on uncorrectable ECC error */ @@ -1860,7 +1861,8 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data) } /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, 1); } /* Reset on uncorrectable ECC error */ @@ -1905,7 +1907,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, 1); } if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -3208,7 +3211,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX)) ew32(RCTL, rctl & ~E1000_RCTL_EN); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (adapter->flags2 & FLAG2_DMA_BURST) { /* set the writeback threshold (only takes effect if the RDTR @@ -4046,12 +4049,12 @@ void e1000e_reset(struct e1000_adapter *adapter) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: - fc->refresh_time = 0x0400; + fc->refresh_time = 0xFFFF; + fc->pause_time = 0xFFFF; if (adapter->netdev->mtu <= ETH_DATA_LEN) { fc->high_water = 0x05C20; fc->low_water = 0x05048; - fc->pause_time = 0x0650; break; } @@ -4272,13 +4275,12 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset) /* flush both disables and wait for them to finish */ e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); e1000_irq_disable(adapter); napi_synchronize(&adapter->napi); - del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); spin_lock(&adapter->stats64_lock); @@ -4310,7 +4312,7 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter) { might_sleep(); while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) - usleep_range(1000, 2000); + usleep_range(1000, 1100); e1000e_down(adapter, true); e1000e_up(adapter); clear_bit(__E1000_RESETTING, &adapter->state); @@ -4707,7 +4709,7 @@ int e1000e_close(struct net_device *netdev) int count = E1000_CHECK_RESET_COUNT; while (test_bit(__E1000_RESETTING, &adapter->state) && count--) - usleep_range(10000, 20000); + usleep_range(10000, 11000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); @@ -5150,31 +5152,18 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter) } } -/** - * e1000_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long - **/ -static void e1000_watchdog(struct timer_list *t) -{ - struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer); - - /* Do the rest outside of interrupt context */ - schedule_work(&adapter->watchdog_task); - - /* TODO: make this use queue_delayed_work() */ -} - static void e1000_watchdog_task(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, - watchdog_task); + watchdog_task.work); struct net_device *netdev = adapter->netdev; struct e1000_mac_info *mac = &adapter->hw.mac; struct e1000_phy_info *phy = &adapter->hw.phy; struct e1000_ring *tx_ring = adapter->tx_ring; + u32 dmoff_exit_timeout = 100, tries = 0; struct e1000_hw *hw = &adapter->hw; - u32 link, tctl; + u32 link, tctl, pcim_state; if (test_bit(__E1000_DOWN, &adapter->state)) return; @@ -5199,6 +5188,21 @@ static void e1000_watchdog_task(struct work_struct *work) /* Cancel scheduled suspend requests. */ pm_runtime_resume(netdev->dev.parent); + /* Checking if MAC is in DMoff state*/ + pcim_state = er32(STATUS); + while (pcim_state & E1000_STATUS_PCIM_STATE) { + if (tries++ == dmoff_exit_timeout) { + e_dbg("Error in exiting dmoff\n"); + break; + } + usleep_range(10000, 20000); + pcim_state = er32(STATUS); + + /* Checking if MAC exited DMoff state */ + if (!(pcim_state & E1000_STATUS_PCIM_STATE)) + e1000_phy_hw_reset(&adapter->hw); + } + /* update snapshot of PHY registers on LSC */ e1000_phy_read_status(adapter); mac->ops.get_link_up_info(&adapter->hw, @@ -5400,8 +5404,9 @@ link_up: /* Reset the timer */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 2 * HZ)); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, + round_jiffies(2 * HZ)); } #define E1000_TX_FLAGS_CSUM 0x00000001 @@ -6021,7 +6026,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) } while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) - usleep_range(1000, 2000); + usleep_range(1000, 1100); /* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */ adapter->max_frame_size = max_frame; e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu); @@ -6301,7 +6306,7 @@ static int e1000e_pm_freeze(struct device *dev) int count = E1000_CHECK_RESET_COUNT; while (test_bit(__E1000_RESETTING, &adapter->state) && count--) - usleep_range(10000, 20000); + usleep_range(10000, 11000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); @@ -6716,7 +6721,7 @@ static int e1000e_pm_runtime_suspend(struct device *dev) int count = E1000_CHECK_RESET_COUNT; while (test_bit(__E1000_RESETTING, &adapter->state) && count--) - usleep_range(10000, 20000); + usleep_range(10000, 11000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); @@ -7256,11 +7261,21 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0); + adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + e1000e_driver_name); + + if (!adapter->e1000_workqueue) { + err = -ENOMEM; + goto err_workqueue; + } + + INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task); + queue_delayed_work(adapter->e1000_workqueue, &adapter->watchdog_task, + 0); + timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0); INIT_WORK(&adapter->reset_task, e1000_reset_task); - INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround); INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task); INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang); @@ -7354,6 +7369,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_register: + flush_workqueue(adapter->e1000_workqueue); + destroy_workqueue(adapter->e1000_workqueue); +err_workqueue: if (!(adapter->flags & FLAG_HAS_AMT)) e1000e_release_hw_control(adapter); err_eeprom: @@ -7400,15 +7418,17 @@ static void e1000_remove(struct pci_dev *pdev) */ if (!down) set_bit(__E1000_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); - cancel_work_sync(&adapter->watchdog_task); cancel_work_sync(&adapter->downshift_task); cancel_work_sync(&adapter->update_phy_task); cancel_work_sync(&adapter->print_hang_task); + cancel_delayed_work(&adapter->watchdog_task); + flush_workqueue(adapter->e1000_workqueue); + destroy_workqueue(adapter->e1000_workqueue); + if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) { cancel_work_sync(&adapter->tx_hwtstamp_work); if (adapter->tx_hwtstamp_skb) { diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index 937f9af22d26..e609f4df86f4 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -392,7 +392,7 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) break; } } - usleep_range(10000, 20000); + usleep_range(10000, 11000); nvm->ops.release(hw); } diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 24e6ce6517a7..84bd06901014 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -27,6 +27,7 @@ #include <net/ip6_checksum.h> #include <linux/ethtool.h> #include <linux/if_vlan.h> +#include <linux/if_macvlan.h> #include <linux/if_bridge.h> #include <linux/clocksource.h> #include <linux/net_tstamp.h> @@ -412,6 +413,11 @@ struct i40e_flex_pit { u8 pit_index; }; +struct i40e_fwd_adapter { + struct net_device *netdev; + int bit_no; +}; + struct i40e_channel { struct list_head list; bool initialized; @@ -426,11 +432,25 @@ struct i40e_channel { struct i40e_aqc_vsi_properties_data info; u64 max_tx_rate; + struct i40e_fwd_adapter *fwd; /* track this channel belongs to which VSI */ struct i40e_vsi *parent_vsi; }; +static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch) +{ + return !!ch->fwd; +} + +static inline u8 *i40e_channel_mac(struct i40e_channel *ch) +{ + if (i40e_is_channel_macvlan(ch)) + return ch->fwd->netdev->dev_addr; + else + return NULL; +} + /* struct that defines the Ethernet device */ struct i40e_pf { struct pci_dev *pdev; @@ -813,6 +833,13 @@ struct i40e_vsi { struct list_head ch_list; u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS]; + /* macvlan fields */ +#define I40E_MAX_MACVLANS 128 /* Max HW vectors - 1 on FVL */ +#define I40E_MIN_MACVLAN_VECTORS 2 /* Min vectors to enable macvlans */ + DECLARE_BITMAP(fwd_bitmask, I40E_MAX_MACVLANS); + struct list_head macvlan_list; + int macvlan_cnt; + void *priv; /* client driver data reference. */ /* VSI specific handlers */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8b0c29b7809b..5361c08328f7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5861,8 +5861,10 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, return -ENOENT; } - /* Success, update channel */ - ch->enabled_tc = enabled_tc; + /* Success, update channel, set enabled_tc only if the channel + * is not a macvlan + */ + ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc; ch->seid = ctxt.seid; ch->vsi_number = ctxt.vsi_number; ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx); @@ -6912,6 +6914,489 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) } /** + * i40e_del_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function deletes a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_remove_macvlan_element_data element; + i40e_status status; + + memset(&element, 0, sizeof(element)); + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; + status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_add_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function adds a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_add_macvlan_element_data element; + i40e_status status; + u16 cmd_flags = 0; + + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.queue_number = 0; + element.match_method = I40E_AQC_MM_ERR_NO_RES; + cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; + element.flags = cpu_to_le16(cmd_flags); + status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_reset_ch_rings - Reset the queue contexts in a channel + * @vsi: the VSI we want to access + * @ch: the channel we want to access + */ +static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch) +{ + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + int i; + + for (i = 0; i < ch->num_queue_pairs; i++) { + pf_q = ch->base_queue + i; + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = NULL; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = NULL; + } +} + +/** + * i40e_free_macvlan_channels + * @vsi: the VSI we want to access + * + * This function frees the Qs of the channel VSI from + * the stack and also deletes the channel VSIs which + * serve as macvlans. + */ +static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int ret; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + struct i40e_vsi *parent_vsi; + + if (i40e_is_channel_macvlan(ch)) { + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + + list_del(&ch->list); + parent_vsi = ch->parent_vsi; + if (!parent_vsi || !ch->initialized) { + kfree(ch); + continue; + } + + /* remove the VSI */ + ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, + NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "unable to remove channel (%d) for parent VSI(%d)\n", + ch->seid, parent_vsi->seid); + kfree(ch); + } + vsi->macvlan_cnt = 0; +} + +/** + * i40e_fwd_ring_up - bring the macvlan device up + * @vsi: the VSI we want to access + * @vdev: macvlan netdevice + * @fwd: the private fwd structure + */ +static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, + struct i40e_fwd_adapter *fwd) +{ + int ret = 0, num_tc = 1, i, aq_err; + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + if (list_empty(&vsi->macvlan_list)) + return -EINVAL; + + /* Go through the list and find an available channel */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (!i40e_is_channel_macvlan(ch)) { + ch->fwd = fwd; + /* record configuration for macvlan interface in vdev */ + for (i = 0; i < num_tc; i++) + netdev_bind_sb_channel_queue(vsi->netdev, vdev, + i, + ch->num_queue_pairs, + ch->base_queue); + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + + /* Get to TX ring ptr */ + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = ch; + + /* Get the RX ring ptr */ + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = ch; + } + break; + } + } + + /* Guarantee all rings are updated before we update the + * MAC address filter. + */ + wmb(); + + /* Add a mac filter */ + ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); + if (ret) { + /* if we cannot add the MAC rule then disable the offload */ + macvlan_release_l2fw_offload(vdev); + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->netdev = NULL; + } + dev_info(&pf->pdev->dev, + "Error adding mac filter on macvlan err %s, aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, aq_err)); + netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n"); + } + + return ret; +} + +/** + * i40e_setup_macvlans - create the channels which will be macvlans + * @vsi: the VSI we want to access + * @macvlan_cnt: no. of macvlans to be setup + * @qcnt: no. of Qs per macvlan + * @vdev: macvlan netdevice + */ +static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt, + struct net_device *vdev) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + struct i40e_vsi_context ctxt; + u16 sections, qmap, num_qps; + struct i40e_channel *ch; + int i, pow, ret = 0; + u8 offset = 0; + + if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt) + return -EINVAL; + + num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt); + + /* find the next higher power-of-2 of num queue pairs */ + pow = fls(roundup_pow_of_two(num_qps) - 1); + + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup context bits for the main VSI */ + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.seid = vsi->seid; + ctxt.pf_num = vsi->back->hw.pf_id; + ctxt.vf_num = 0; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.info = vsi->info; + ctxt.info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); + ctxt.info.valid_sections |= cpu_to_le16(sections); + + /* Reconfigure RSS for main VSI with new max queue count */ + vsi->rss_size = max_t(u16, num_qps, qcnt); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed to reconfig RSS for num_queues (%u)\n", + vsi->rss_size); + return ret; + } + vsi->reconfig_rss = true; + dev_dbg(&vsi->back->pdev->dev, + "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size); + vsi->next_base_queue = num_qps; + vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps; + + /* Update the VSI after updating the VSI queue-mapping + * information + */ + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Update vsi tc config failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + /* Create channels for macvlans */ + INIT_LIST_HEAD(&vsi->macvlan_list); + for (i = 0; i < macvlan_cnt; i++) { + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + INIT_LIST_HEAD(&ch->list); + ch->num_queue_pairs = qcnt; + if (!i40e_setup_channel(pf, vsi, ch)) { + ret = -EINVAL; + goto err_free; + } + ch->parent_vsi = vsi; + vsi->cnt_q_avail -= ch->num_queue_pairs; + vsi->macvlan_cnt++; + list_add_tail(&ch->list, &vsi->macvlan_list); + } + + return ret; + +err_free: + dev_info(&pf->pdev->dev, "Failed to setup macvlans\n"); + i40e_free_macvlan_channels(vsi); + + return ret; +} + +/** + * i40e_fwd_add - configure macvlans + * @netdev: net device to configure + * @vdev: macvlan netdevice + **/ +static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_fwd_adapter *fwd; + int avail_macvlan, ret; + + if ((pf->flags & I40E_FLAG_DCB_ENABLED)) { + netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); + return ERR_PTR(-EINVAL); + } + if ((pf->flags & I40E_FLAG_TC_MQPRIO)) { + netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n"); + return ERR_PTR(-EINVAL); + } + if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) { + netdev_info(netdev, "Not enough vectors available to support macvlans\n"); + return ERR_PTR(-EINVAL); + } + + /* The macvlan device has to be a single Q device so that the + * tc_to_txq field can be reused to pick the tx queue. + */ + if (netif_is_multiqueue(vdev)) + return ERR_PTR(-ERANGE); + + if (!vsi->macvlan_cnt) { + /* reserve bit 0 for the pf device */ + set_bit(0, vsi->fwd_bitmask); + + /* Try to reserve as many queues as possible for macvlans. First + * reserve 3/4th of max vectors, then half, then quarter and + * calculate Qs per macvlan as you go + */ + vectors = pf->num_lan_msix; + if (vectors <= I40E_MAX_MACVLANS && vectors > 64) { + /* allocate 4 Qs per macvlan and 32 Qs to the PF*/ + q_per_macvlan = 4; + macvlan_cnt = (vectors - 32) / 4; + } else if (vectors <= 64 && vectors > 32) { + /* allocate 2 Qs per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 2; + macvlan_cnt = (vectors - 16) / 2; + } else if (vectors <= 32 && vectors > 16) { + /* allocate 1 Q per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 1; + macvlan_cnt = vectors - 16; + } else if (vectors <= 16 && vectors > 8) { + /* allocate 1 Q per macvlan and 8 Qs to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 8; + } else { + /* allocate 1 Q per macvlan and 1 Q to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 1; + } + + if (macvlan_cnt == 0) + return ERR_PTR(-EBUSY); + + /* Quiesce VSI queues */ + i40e_quiesce_vsi(vsi); + + /* sets up the macvlans but does not "enable" them */ + ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan, + vdev); + if (ret) + return ERR_PTR(ret); + + /* Unquiesce VSI */ + i40e_unquiesce_vsi(vsi); + } + avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask, + vsi->macvlan_cnt); + if (avail_macvlan >= I40E_MAX_MACVLANS) + return ERR_PTR(-EBUSY); + + /* create the fwd struct */ + fwd = kzalloc(sizeof(*fwd), GFP_KERNEL); + if (!fwd) + return ERR_PTR(-ENOMEM); + + set_bit(avail_macvlan, vsi->fwd_bitmask); + fwd->bit_no = avail_macvlan; + netdev_set_sb_channel(vdev, avail_macvlan); + fwd->netdev = vdev; + + if (!netif_running(netdev)) + return fwd; + + /* Set fwd ring up */ + ret = i40e_fwd_ring_up(vsi, vdev, fwd); + if (ret) { + /* unbind the queues and drop the subordinate channel config */ + netdev_unbind_sb_channel(netdev, vdev); + netdev_set_sb_channel(vdev, 0); + + kfree(fwd); + return ERR_PTR(-EINVAL); + } + + return fwd; +} + +/** + * i40e_del_all_macvlans - Delete all the mac filters on the channels + * @vsi: the VSI we want to access + */ +static void i40e_del_all_macvlans(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, + ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + } + } +} + +/** + * i40e_fwd_del - delete macvlan interfaces + * @netdev: net device to configure + * @vdev: macvlan netdevice + */ +static void i40e_fwd_del(struct net_device *netdev, void *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_fwd_adapter *fwd = vdev; + struct i40e_channel *ch, *ch_tmp; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + /* Find the channel associated with the macvlan and del mac filter */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch) && + ether_addr_equal(i40e_channel_mac(ch), + fwd->netdev->dev_addr)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(netdev, fwd->netdev); + netdev_set_sb_channel(fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } else { + dev_info(&pf->pdev->dev, + "Error deleting mac filter on macvlan err %s, aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, aq_err)); + } + break; + } + } +} + +/** * i40e_setup_tc - configure multiple traffic classes * @netdev: net device to configure * @type_data: tc offload data @@ -11665,6 +12150,9 @@ static int i40e_set_features(struct net_device *netdev, return -EINVAL; } + if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt) + i40e_del_all_macvlans(vsi); + need_reset = i40e_set_ntuple(pf, features); if (need_reset) @@ -12409,6 +12897,8 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_bpf = i40e_xdp, .ndo_xdp_xmit = i40e_xdp_xmit, .ndo_xsk_async_xmit = i40e_xsk_async_xmit, + .ndo_dfwd_add_station = i40e_fwd_add, + .ndo_dfwd_del_station = i40e_fwd_del, }; /** @@ -12468,6 +12958,9 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; + /* enable macvlan offloads */ + netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; + hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h index d39684558597..a452ce90679a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_osdep.h +++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h @@ -44,8 +44,12 @@ struct iavf_virt_mem { #define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s) #define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m) -#define iavf_debug(h, m, s, ...) iavf_debug_d(h, m, s, ##__VA_ARGS__) -extern void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...) - __printf(3, 4); +#define iavf_debug(h, m, s, ...) \ +do { \ + if (((m) & (h)->debug_mask)) \ + pr_info("iavf %02x:%02x.%x " s, \ + (h)->bus.bus_id, (h)->bus.device, \ + (h)->bus.func, ##__VA_ARGS__); \ +} while (0) #endif /* _IAVF_OSDEP_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 1cde1601bc32..0cca1b589b56 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1296,7 +1296,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, struct iavf_rx_buffer *rx_buffer, unsigned int size) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + void *va; #if (PAGE_SIZE < 8192) unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; #else @@ -1308,6 +1308,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, if (!rx_buffer) return NULL; /* prefetch first cache line of first page */ + va = page_address(rx_buffer->page) + rx_buffer->page_offset; prefetch(va); #if L1_CACHE_BYTES < 128 prefetch(va + L1_CACHE_BYTES); @@ -1362,7 +1363,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, struct iavf_rx_buffer *rx_buffer, unsigned int size) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + void *va; #if (PAGE_SIZE < 8192) unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; #else @@ -1374,6 +1375,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, if (!rx_buffer) return NULL; /* prefetch first cache line of first page */ + va = page_address(rx_buffer->page) + rx_buffer->page_offset; prefetch(va); #if L1_CACHE_BYTES < 128 prefetch(va + L1_CACHE_BYTES); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index cb7c56c5afe6..d49d58a6de80 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -242,7 +242,8 @@ void iavf_configure_queues(struct iavf_adapter *adapter) struct virtchnl_vsi_queue_config_info *vqci; struct virtchnl_queue_pair_info *vqpi; int pairs = adapter->num_active_queues; - int i, len, max_frame = IAVF_MAX_RXBUFFER; + int i, max_frame = IAVF_MAX_RXBUFFER; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -251,8 +252,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter) return; } adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES; - len = sizeof(struct virtchnl_vsi_queue_config_info) + - (sizeof(struct virtchnl_queue_pair_info) * pairs); + len = struct_size(vqci, qpair, pairs); vqci = kzalloc(len, GFP_KERNEL); if (!vqci) return; @@ -351,8 +351,9 @@ void iavf_map_queues(struct iavf_adapter *adapter) { struct virtchnl_irq_map_info *vimi; struct virtchnl_vector_map *vecmap; - int v_idx, q_vectors, len; struct iavf_q_vector *q_vector; + int v_idx, q_vectors; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -364,9 +365,7 @@ void iavf_map_queues(struct iavf_adapter *adapter) q_vectors = adapter->num_msix_vectors - NONQ_VECS; - len = sizeof(struct virtchnl_irq_map_info) + - (adapter->num_msix_vectors * - sizeof(struct virtchnl_vector_map)); + len = struct_size(vimi, vecmap, adapter->num_msix_vectors); vimi = kzalloc(len, GFP_KERNEL); if (!vimi) return; @@ -433,9 +432,10 @@ int iavf_request_queues(struct iavf_adapter *adapter, int num) void iavf_add_ether_addrs(struct iavf_adapter *adapter) { struct virtchnl_ether_addr_list *veal; - int len, i = 0, count = 0; struct iavf_mac_filter *f; + int i = 0, count = 0; bool more = false; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -457,15 +457,13 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter) } adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR; - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); if (len > IAVF_MAX_AQ_BUF_SIZE) { dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n"); count = (IAVF_MAX_AQ_BUF_SIZE - sizeof(struct virtchnl_ether_addr_list)) / sizeof(struct virtchnl_ether_addr); - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); more = true; } @@ -505,8 +503,9 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) { struct virtchnl_ether_addr_list *veal; struct iavf_mac_filter *f, *ftmp; - int len, i = 0, count = 0; + int i = 0, count = 0; bool more = false; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -528,15 +527,13 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) } adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR; - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); if (len > IAVF_MAX_AQ_BUF_SIZE) { dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n"); count = (IAVF_MAX_AQ_BUF_SIZE - sizeof(struct virtchnl_ether_addr_list)) / sizeof(struct virtchnl_ether_addr); - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); more = true; } veal = kzalloc(len, GFP_ATOMIC); @@ -973,7 +970,7 @@ static void iavf_print_link_message(struct iavf_adapter *adapter) void iavf_enable_channels(struct iavf_adapter *adapter) { struct virtchnl_tc_info *vti = NULL; - u16 len; + size_t len; int i; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { @@ -983,9 +980,7 @@ void iavf_enable_channels(struct iavf_adapter *adapter) return; } - len = ((adapter->num_tc - 1) * sizeof(struct virtchnl_channel_info)) + - sizeof(struct virtchnl_tc_info); - + len = struct_size(vti, list, adapter->num_tc - 1); vti = kzalloc(len, GFP_KERNEL); if (!vti) return; diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 8d49f83be7a5..2a232504379d 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -683,10 +683,10 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, u16 i, num_groups_added = 0; enum ice_status status = 0; struct ice_hw *hw = pi->hw; - u16 buf_size; + size_t buf_size; u32 teid; - buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1); + buf_size = struct_size(buf, generic, num_nodes - 1); buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); if (!buf) return ICE_ERR_NO_MEMORY; diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 0ad737d2f289..9cb49980ec2d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -409,6 +409,8 @@ do { \ #define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40)) #define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40)) +#define E1000_I210_RR2DCDELAY 0x5BF4 + #define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) #define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index c645d9e648e0..3182b059bf55 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -448,7 +448,7 @@ static void igb_set_msglevel(struct net_device *netdev, u32 data) static int igb_get_regs_len(struct net_device *netdev) { -#define IGB_REGS_LEN 739 +#define IGB_REGS_LEN 740 return IGB_REGS_LEN * sizeof(u32); } @@ -675,41 +675,44 @@ static void igb_get_regs(struct net_device *netdev, regs_buff[554] = adapter->stats.b2ogprc; } - if (hw->mac.type != e1000_82576) - return; - for (i = 0; i < 12; i++) - regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4)); - for (i = 0; i < 4; i++) - regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[607 + i] = rd32(E1000_RDH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[619 + i] = rd32(E1000_RDT(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4)); - - for (i = 0; i < 12; i++) - regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[679 + i] = rd32(E1000_TDH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[691 + i] = rd32(E1000_TDT(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4)); + if (hw->mac.type == e1000_82576) { + for (i = 0; i < 12; i++) + regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4)); + for (i = 0; i < 4; i++) + regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[607 + i] = rd32(E1000_RDH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[619 + i] = rd32(E1000_RDT(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4)); + + for (i = 0; i < 12; i++) + regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[679 + i] = rd32(E1000_TDH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[691 + i] = rd32(E1000_TDT(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4)); + } + + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) + regs_buff[739] = rd32(E1000_I210_RR2DCDELAY); } static int igb_get_eeprom_len(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fc925adbd9fa..f66dae72fe37 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5688,6 +5688,7 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, */ if (tx_ring->launchtime_enable) { ts = ns_to_timespec64(first->skb->tstamp); + first->skb->tstamp = 0; context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->seqnum_seed = 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index ff85ce5791a3..31629fc7e820 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -842,6 +842,9 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf) struct ixgbe_ipsec *ipsec = adapter->ipsec; int i; + if (!ipsec) + return; + /* search rx sa table */ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) { if (!ipsec->rx_tbl[i].used) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 2c4d327fcc2e..0be13a90ff79 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -205,11 +205,8 @@ static void ixgbe_ptp_setup_sdp_X540(struct ixgbe_adapter *adapter) */ rem = (NS_PER_SEC - rem); - /* Adjust the clock edge to align with the next full second. This - * assumes that the cycle counter shift is small enough to avoid - * overflowing when shifting the remainder. - */ - clock_edge += div_u64((rem << cc->shift), cc->mult); + /* Adjust the clock edge to align with the next full second. */ + clock_edge += div_u64(((u64)rem << cc->shift), cc->mult); trgttiml = (u32)clock_edge; trgttimh = (u32)(clock_edge >> 32); @@ -291,11 +288,8 @@ static void ixgbe_ptp_setup_sdp_X550(struct ixgbe_adapter *adapter) */ rem = (NS_PER_SEC - rem); - /* Adjust the clock edge to align with the next full second. This - * assumes that the cycle counter shift is small enough to avoid - * overflowing when shifting the remainder. - */ - clock_edge += div_u64((rem << cc->shift), cc->mult); + /* Adjust the clock edge to align with the next full second. */ + clock_edge += div_u64(((u64)rem << cc->shift), cc->mult); /* X550 hardware stores the time in 32bits of 'billions of cycles' and * 32bits of 'cycles'. There's no guarantee that cycles represents diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 5399787e07af..54459b69c948 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -85,22 +85,16 @@ static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u32 link_speed = 0; - bool link_up; ethtool_link_ksettings_zero_link_mode(cmd, supported); ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full); cmd->base.autoneg = AUTONEG_DISABLE; cmd->base.port = -1; - hw->mac.get_link_status = 1; - hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - - if (link_up) { + if (adapter->link_up) { __u32 speed = SPEED_10000; - switch (link_speed) { + switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: speed = SPEED_10000; break; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 48b8a90f7057..a73f2532115b 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1528,7 +1528,7 @@ static int rtl8169_set_features(struct net_device *dev, static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb) { return (skb_vlan_tag_present(skb)) ? - TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00; + TxVlanTag | htons(skb_vlan_tag_get(skb)) : 0x00; } static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb) @@ -1536,7 +1536,8 @@ static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb) u32 opts2 = le32_to_cpu(desc->opts2); if (opts2 & RxVlanTag) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), swab16(opts2 & 0xffff)); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + ntohs(opts2 & 0xffff)); } static void rtl8169_get_regs(struct net_device *dev, struct ethtool_regs *regs, @@ -5784,7 +5785,6 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data, skb = napi_alloc_skb(&tp->napi, pkt_size); if (skb) skb_copy_to_linear_data(skb, data, pkt_size); - dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); return skb; } @@ -6723,15 +6723,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->cp_cmd = RTL_R16(tp, CPlusCmd); if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 && - !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { + !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) dev->features |= NETIF_F_HIGHDMA; - } else { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc < 0) { - dev_err(&pdev->dev, "DMA configuration failed\n"); - return rc; - } - } rtl_init_rxcfg(tp); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 720f2b32fc2f..1a7596ba0dbe 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -10,7 +10,7 @@ #include <net/net_namespace.h> /* TC action not accessible from user space */ -#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1) +#define TC_ACT_CONSUMED (TC_ACT_VALUE_MAX + 1) /* Basic packet classifier frontend definitions. */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 21f434f3ac9e..855167bbc372 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -279,7 +279,7 @@ struct tcf_result { }; const struct tcf_proto *goto_tp; - /* used by the TC_ACT_REINSERT action */ + /* used in the skb_tc_reinsert function */ struct { bool ingress; struct gnet_stats_queue *qstats; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 8b2f993cbb77..390efb54b2e0 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -988,8 +988,9 @@ struct tc_etf_qopt { __s32 delta; __s32 clockid; __u32 flags; -#define TC_ETF_DEADLINE_MODE_ON BIT(0) -#define TC_ETF_OFFLOAD_ON BIT(1) +#define TC_ETF_DEADLINE_MODE_ON _BITUL(0) +#define TC_ETF_OFFLOAD_ON _BITUL(1) +#define TC_ETF_SKIP_SOCK_CHECK _BITUL(2) }; enum { @@ -1158,6 +1159,8 @@ enum { * [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL] */ +#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST 0x1 + enum { TCA_TAPRIO_ATTR_UNSPEC, TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ @@ -1169,6 +1172,8 @@ enum { TCA_TAPRIO_ATTR_ADMIN_SCHED, /* The admin sched, only used in dump */ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, /* s64 */ TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ + TCA_TAPRIO_ATTR_FLAGS, /* u32 */ + TCA_TAPRIO_ATTR_TXTIME_DELAY, /* s32 */ __TCA_TAPRIO_ATTR_MAX, }; diff --git a/net/core/dev.c b/net/core/dev.c index d6edd218babd..58529318b3a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4689,9 +4689,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, __skb_push(skb, skb->mac_len); skb_do_redirect(skb); return NULL; - case TC_ACT_REINSERT: - /* this does not scrub the packet, and updates stats on error */ - skb_tc_reinsert(skb, &cl_res); + case TC_ACT_CONSUMED: return NULL; default: break; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 58e7573dded4..c3fce36633b2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -27,6 +27,9 @@ static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); +#define MIRRED_RECURSION_LIMIT 4 +static DEFINE_PER_CPU(unsigned int, mirred_rec_level); + static bool tcf_mirred_is_act_redirect(int action) { return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; @@ -210,6 +213,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, struct sk_buff *skb2 = skb; bool m_mac_header_xmit; struct net_device *dev; + unsigned int rec_level; int retval, err = 0; bool use_reinsert; bool want_ingress; @@ -217,6 +221,14 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, int m_eaction; int mac_len; + rec_level = __this_cpu_inc_return(mirred_rec_level); + if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); + __this_cpu_dec(mirred_rec_level); + return TC_ACT_SHOT; + } + tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); @@ -277,7 +289,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, if (use_reinsert) { res->ingress = want_ingress; res->qstats = this_cpu_ptr(m->common.cpu_qstats); - return TC_ACT_REINSERT; + skb_tc_reinsert(skb, res); + __this_cpu_dec(mirred_rec_level); + return TC_ACT_CONSUMED; } } @@ -292,6 +306,7 @@ out: if (tcf_mirred_is_act_redirect(m_eaction)) retval = TC_ACT_SHOT; } + __this_cpu_dec(mirred_rec_level); return retval; } diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c index 243fd22f2248..9fff6480acc6 100644 --- a/net/sched/em_ipt.c +++ b/net/sched/em_ipt.c @@ -21,6 +21,7 @@ struct em_ipt_match { const struct xt_match *match; u32 hook; + u8 nfproto; u8 match_data[0] __aligned(8); }; @@ -71,11 +72,25 @@ static int policy_validate_match_data(struct nlattr **tb, u8 mrev) return 0; } +static int addrtype_validate_match_data(struct nlattr **tb, u8 mrev) +{ + if (mrev != 1) { + pr_err("only addrtype match revision 1 supported"); + return -EINVAL; + } + + return 0; +} + static const struct em_ipt_xt_match em_ipt_xt_matches[] = { { .match_name = "policy", .validate_match_data = policy_validate_match_data }, + { + .match_name = "addrtype", + .validate_match_data = addrtype_validate_match_data + }, {} }; @@ -115,6 +130,7 @@ static int em_ipt_change(struct net *net, void *data, int data_len, struct em_ipt_match *im = NULL; struct xt_match *match; int mdata_len, ret; + u8 nfproto; ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy, NULL); @@ -125,6 +141,15 @@ static int em_ipt_change(struct net *net, void *data, int data_len, !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO]) return -EINVAL; + nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]); + switch (nfproto) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + break; + default: + return -EINVAL; + } + match = get_xt_match(tb); if (IS_ERR(match)) { pr_err("unable to load match\n"); @@ -140,6 +165,7 @@ static int em_ipt_change(struct net *net, void *data, int data_len, im->match = match; im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]); + im->nfproto = nfproto; nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len); ret = check_match(net, im, mdata_len); @@ -182,15 +208,33 @@ static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em, const struct em_ipt_match *im = (const void *)em->data; struct xt_action_param acpar = {}; struct net_device *indev = NULL; + u8 nfproto = im->match->family; struct nf_hook_state state; int ret; + switch (tc_skb_protocol(skb)) { + case htons(ETH_P_IP): + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return 0; + if (nfproto == NFPROTO_UNSPEC) + nfproto = NFPROTO_IPV4; + break; + case htons(ETH_P_IPV6): + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + if (nfproto == NFPROTO_UNSPEC) + nfproto = NFPROTO_IPV6; + break; + default: + return 0; + } + rcu_read_lock(); if (skb->skb_iif) indev = dev_get_by_index_rcu(em->net, skb->skb_iif); - nf_hook_state_init(&state, im->hook, im->match->family, + nf_hook_state_init(&state, im->hook, nfproto, indev ?: skb->dev, skb->dev, NULL, em->net, NULL); acpar.match = im->match; @@ -213,7 +257,7 @@ static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em) return -EMSGSIZE; if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0) return -EMSGSIZE; - if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0) + if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->nfproto) < 0) return -EMSGSIZE; if (nla_put(skb, TCA_EM_IPT_MATCH_DATA, im->match->usersize ?: im->match->matchsize, diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index db0c2ba1d156..cebfb65d8556 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -22,10 +22,12 @@ #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) +#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK) struct etf_sched_data { bool offload; bool deadline_mode; + bool skip_sock_check; int clockid; int queue; s32 delta; /* in ns */ @@ -77,6 +79,9 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) struct sock *sk = nskb->sk; ktime_t now; + if (q->skip_sock_check) + goto skip; + if (!sk) return false; @@ -92,6 +97,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) if (sk->sk_txtime_deadline_mode != q->deadline_mode) return false; +skip: now = q->get_time(); if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) return false; @@ -385,6 +391,7 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt, q->clockid = qopt->clockid; q->offload = OFFLOAD_IS_ON(qopt); q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); + q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt); switch (q->clockid) { case CLOCK_REALTIME: @@ -473,6 +480,9 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) if (q->deadline_mode) opt.flags |= TC_ETF_DEADLINE_MODE_ON; + if (q->skip_sock_check) + opt.flags |= TC_ETF_SKIP_SOCK_CHECK; + if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) goto nla_put_failure; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 9ecfb8f5902a..388750ddc57a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -21,12 +21,17 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/sch_generic.h> +#include <net/sock.h> +#include <net/tcp.h> static LIST_HEAD(taprio_list); static DEFINE_SPINLOCK(taprio_list_lock); #define TAPRIO_ALL_GATES_OPEN -1 +#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)) +#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) + struct sched_entry { struct list_head list; @@ -35,6 +40,7 @@ struct sched_entry { * packet leaves after this time. */ ktime_t close_time; + ktime_t next_txtime; atomic_t budget; int index; u32 gate_mask; @@ -55,6 +61,8 @@ struct sched_gate_list { struct taprio_sched { struct Qdisc **qdiscs; struct Qdisc *root; + u32 flags; + enum tk_offsets tk_offset; int clockid; atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ * speeds it's sub-nanoseconds per byte @@ -65,9 +73,9 @@ struct taprio_sched { struct sched_entry __rcu *current_entry; struct sched_gate_list __rcu *oper_sched; struct sched_gate_list __rcu *admin_sched; - ktime_t (*get_time)(void); struct hrtimer advance_timer; struct list_head taprio_list; + int txtime_delay; }; static ktime_t sched_base_time(const struct sched_gate_list *sched) @@ -78,6 +86,20 @@ static ktime_t sched_base_time(const struct sched_gate_list *sched) return ns_to_ktime(sched->base_time); } +static ktime_t taprio_get_time(struct taprio_sched *q) +{ + ktime_t mono = ktime_get(); + + switch (q->tk_offset) { + case TK_OFFS_MAX: + return mono; + default: + return ktime_mono_to_any(mono, q->tk_offset); + } + + return KTIME_MAX; +} + static void taprio_free_sched_cb(struct rcu_head *head) { struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu); @@ -108,20 +130,263 @@ static void switch_schedules(struct taprio_sched *q, *admin = NULL; } -static ktime_t get_cycle_time(struct sched_gate_list *sched) +/* Get how much time has been already elapsed in the current cycle. */ +static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time) +{ + ktime_t time_since_sched_start; + s32 time_elapsed; + + time_since_sched_start = ktime_sub(time, sched->base_time); + div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed); + + return time_elapsed; +} + +static ktime_t get_interval_end_time(struct sched_gate_list *sched, + struct sched_gate_list *admin, + struct sched_entry *entry, + ktime_t intv_start) +{ + s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start); + ktime_t intv_end, cycle_ext_end, cycle_end; + + cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed); + intv_end = ktime_add_ns(intv_start, entry->interval); + cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension); + + if (ktime_before(intv_end, cycle_end)) + return intv_end; + else if (admin && admin != sched && + ktime_after(admin->base_time, cycle_end) && + ktime_before(admin->base_time, cycle_ext_end)) + return admin->base_time; + else + return cycle_end; +} + +static int length_to_duration(struct taprio_sched *q, int len) +{ + return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); +} + +/* Returns the entry corresponding to next available interval. If + * validate_interval is set, it only validates whether the timestamp occurs + * when the gate corresponding to the skb's traffic class is open. + */ +static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb, + struct Qdisc *sch, + struct sched_gate_list *sched, + struct sched_gate_list *admin, + ktime_t time, + ktime_t *interval_start, + ktime_t *interval_end, + bool validate_interval) +{ + ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time; + ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time; + struct sched_entry *entry = NULL, *entry_found = NULL; + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + bool entry_available = false; + s32 cycle_elapsed; + int tc, n; + + tc = netdev_get_prio_tc_map(dev, skb->priority); + packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb)); + + *interval_start = 0; + *interval_end = 0; + + if (!sched) + return NULL; + + cycle = sched->cycle_time; + cycle_elapsed = get_cycle_time_elapsed(sched, time); + curr_intv_end = ktime_sub_ns(time, cycle_elapsed); + cycle_end = ktime_add_ns(curr_intv_end, cycle); + + list_for_each_entry(entry, &sched->entries, list) { + curr_intv_start = curr_intv_end; + curr_intv_end = get_interval_end_time(sched, admin, entry, + curr_intv_start); + + if (ktime_after(curr_intv_start, cycle_end)) + break; + + if (!(entry->gate_mask & BIT(tc)) || + packet_transmit_time > entry->interval) + continue; + + txtime = entry->next_txtime; + + if (ktime_before(txtime, time) || validate_interval) { + transmit_end_time = ktime_add_ns(time, packet_transmit_time); + if ((ktime_before(curr_intv_start, time) && + ktime_before(transmit_end_time, curr_intv_end)) || + (ktime_after(curr_intv_start, time) && !validate_interval)) { + entry_found = entry; + *interval_start = curr_intv_start; + *interval_end = curr_intv_end; + break; + } else if (!entry_available && !validate_interval) { + /* Here, we are just trying to find out the + * first available interval in the next cycle. + */ + entry_available = 1; + entry_found = entry; + *interval_start = ktime_add_ns(curr_intv_start, cycle); + *interval_end = ktime_add_ns(curr_intv_end, cycle); + } + } else if (ktime_before(txtime, earliest_txtime) && + !entry_available) { + earliest_txtime = txtime; + entry_found = entry; + n = div_s64(ktime_sub(txtime, curr_intv_start), cycle); + *interval_start = ktime_add(curr_intv_start, n * cycle); + *interval_end = ktime_add(curr_intv_end, n * cycle); + } + } + + return entry_found; +} + +static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) { + struct taprio_sched *q = qdisc_priv(sch); + struct sched_gate_list *sched, *admin; + ktime_t interval_start, interval_end; struct sched_entry *entry; - ktime_t cycle = 0; - if (sched->cycle_time != 0) - return sched->cycle_time; + rcu_read_lock(); + sched = rcu_dereference(q->oper_sched); + admin = rcu_dereference(q->admin_sched); + + entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp, + &interval_start, &interval_end, true); + rcu_read_unlock(); - list_for_each_entry(entry, &sched->entries, list) - cycle = ktime_add_ns(cycle, entry->interval); + return entry; +} - sched->cycle_time = cycle; +/* This returns the tstamp value set by TCP in terms of the set clock. */ +static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) +{ + unsigned int offset = skb_network_offset(skb); + const struct ipv6hdr *ipv6h; + const struct iphdr *iph; + struct ipv6hdr _ipv6h; - return cycle; + ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); + if (!ipv6h) + return 0; + + if (ipv6h->version == 4) { + iph = (struct iphdr *)ipv6h; + offset += iph->ihl * 4; + + /* special-case 6in4 tunnelling, as that is a common way to get + * v6 connectivity in the home + */ + if (iph->protocol == IPPROTO_IPV6) { + ipv6h = skb_header_pointer(skb, offset, + sizeof(_ipv6h), &_ipv6h); + + if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP) + return 0; + } else if (iph->protocol != IPPROTO_TCP) { + return 0; + } + } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) { + return 0; + } + + return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset); +} + +/* There are a few scenarios where we will have to modify the txtime from + * what is read from next_txtime in sched_entry. They are: + * 1. If txtime is in the past, + * a. The gate for the traffic class is currently open and packet can be + * transmitted before it closes, schedule the packet right away. + * b. If the gate corresponding to the traffic class is going to open later + * in the cycle, set the txtime of packet to the interval start. + * 2. If txtime is in the future, there are packets corresponding to the + * current traffic class waiting to be transmitted. So, the following + * possibilities exist: + * a. We can transmit the packet before the window containing the txtime + * closes. + * b. The window might close before the transmission can be completed + * successfully. So, schedule the packet in the next open window. + */ +static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) +{ + ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp; + struct taprio_sched *q = qdisc_priv(sch); + struct sched_gate_list *sched, *admin; + ktime_t minimum_time, now, txtime; + int len, packet_transmit_time; + struct sched_entry *entry; + bool sched_changed; + + now = taprio_get_time(q); + minimum_time = ktime_add_ns(now, q->txtime_delay); + + tcp_tstamp = get_tcp_tstamp(q, skb); + minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp); + + rcu_read_lock(); + admin = rcu_dereference(q->admin_sched); + sched = rcu_dereference(q->oper_sched); + if (admin && ktime_after(minimum_time, admin->base_time)) + switch_schedules(q, &admin, &sched); + + /* Until the schedule starts, all the queues are open */ + if (!sched || ktime_before(minimum_time, sched->base_time)) { + txtime = minimum_time; + goto done; + } + + len = qdisc_pkt_len(skb); + packet_transmit_time = length_to_duration(q, len); + + do { + sched_changed = 0; + + entry = find_entry_to_transmit(skb, sch, sched, admin, + minimum_time, + &interval_start, &interval_end, + false); + if (!entry) { + txtime = 0; + goto done; + } + + txtime = entry->next_txtime; + txtime = max_t(ktime_t, txtime, minimum_time); + txtime = max_t(ktime_t, txtime, interval_start); + + if (admin && admin != sched && + ktime_after(txtime, admin->base_time)) { + sched = admin; + sched_changed = 1; + continue; + } + + transmit_end_time = ktime_add(txtime, packet_transmit_time); + minimum_time = transmit_end_time; + + /* Update the txtime of current entry to the next time it's + * interval starts. + */ + if (ktime_after(transmit_end_time, interval_end)) + entry->next_txtime = ktime_add(interval_start, sched->cycle_time); + } while (sched_changed || ktime_after(transmit_end_time, interval_end)); + + entry->next_txtime = transmit_end_time; + +done: + rcu_read_unlock(); + return txtime; } static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -137,6 +402,15 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(!child)) return qdisc_drop(skb, sch, to_free); + if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + if (!is_valid_interval(skb, sch)) + return qdisc_drop(skb, sch, to_free); + } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { + skb->tstamp = get_packet_txtime(skb, sch); + if (!skb->tstamp) + return qdisc_drop(skb, sch, to_free); + } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; @@ -172,6 +446,9 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) if (!skb) continue; + if (TXTIME_ASSIST_IS_ENABLED(q->flags)) + return skb; + prio = skb->priority; tc = netdev_get_prio_tc_map(dev, prio); @@ -184,11 +461,6 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) return NULL; } -static inline int length_to_duration(struct taprio_sched *q, int len) -{ - return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); -} - static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, @@ -232,6 +504,13 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) if (unlikely(!child)) continue; + if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { + skb = child->ops->dequeue(child); + if (!skb) + continue; + goto skb_found; + } + skb = child->ops->peek(child); if (!skb) continue; @@ -243,7 +522,7 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) continue; len = qdisc_pkt_len(skb); - guard = ktime_add_ns(q->get_time(), + guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len)); /* In the case that there's no gate entry, there's no @@ -262,6 +541,7 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) if (unlikely(!skb)) goto done; +skb_found: qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; @@ -524,12 +804,22 @@ static int parse_taprio_schedule(struct nlattr **tb, if (err < 0) return err; + if (!new->cycle_time) { + struct sched_entry *entry; + ktime_t cycle = 0; + + list_for_each_entry(entry, &new->entries, list) + cycle = ktime_add_ns(cycle, entry->interval); + new->cycle_time = cycle; + } + return 0; } static int taprio_parse_mqprio_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + u32 taprio_flags) { int i, j; @@ -577,6 +867,9 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, return -EINVAL; } + if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) + continue; + /* Verify that the offset and counts do not overlap */ for (j = i + 1; j < qopt->num_tc; j++) { if (last > qopt->offset[j]) { @@ -598,14 +891,14 @@ static int taprio_get_start_time(struct Qdisc *sch, s64 n; base = sched_base_time(sched); - now = q->get_time(); + now = taprio_get_time(q); if (ktime_after(base, now)) { *start = base; return 0; } - cycle = get_cycle_time(sched); + cycle = sched->cycle_time; /* The qdisc is expected to have at least one sched_entry. Moreover, * any entry must have 'interval' > 0. Thus if the cycle time is zero, @@ -632,7 +925,7 @@ static void setup_first_close_time(struct taprio_sched *q, first = list_first_entry(&sched->entries, struct sched_entry, list); - cycle = get_cycle_time(sched); + cycle = sched->cycle_time; /* FIXME: find a better place to do this */ sched->cycle_close_time = ktime_add_ns(base, cycle); @@ -707,6 +1000,18 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; } +static void setup_txtime(struct taprio_sched *q, + struct sched_gate_list *sched, ktime_t base) +{ + struct sched_entry *entry; + u32 interval = 0; + + list_for_each_entry(entry, &sched->entries, list) { + entry->next_txtime = ktime_add_ns(base, interval); + interval += entry->interval; + } +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -715,6 +1020,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; + u32 taprio_flags = 0; int i, err, clockid; unsigned long flags; ktime_t start; @@ -727,7 +1033,21 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); - err = taprio_parse_mqprio_opt(dev, mqprio, extack); + if (tb[TCA_TAPRIO_ATTR_FLAGS]) { + taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]); + + if (q->flags != 0 && q->flags != taprio_flags) { + NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); + return -EOPNOTSUPP; + } else if (!FLAGS_VALID(taprio_flags)) { + NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); + return -EINVAL; + } + + q->flags = taprio_flags; + } + + err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags); if (err < 0) return err; @@ -786,7 +1106,18 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, /* Protects against enqueue()/dequeue() */ spin_lock_bh(qdisc_lock(sch)); - if (!hrtimer_active(&q->advance_timer)) { + if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) { + if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) { + NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled"); + err = -EINVAL; + goto unlock; + } + + q->txtime_delay = nla_get_s32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); + } + + if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && + !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; } @@ -806,16 +1137,16 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, switch (q->clockid) { case CLOCK_REALTIME: - q->get_time = ktime_get_real; + q->tk_offset = TK_OFFS_REAL; break; case CLOCK_MONOTONIC: - q->get_time = ktime_get; + q->tk_offset = TK_OFFS_MAX; break; case CLOCK_BOOTTIME: - q->get_time = ktime_get_boottime; + q->tk_offset = TK_OFFS_BOOT; break; case CLOCK_TAI: - q->get_time = ktime_get_clocktai; + q->tk_offset = TK_OFFS_TAI; break; default: NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); @@ -829,20 +1160,35 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto unlock; } - setup_first_close_time(q, new_admin, start); + if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) { + setup_txtime(q, new_admin, start); - /* Protects against advance_sched() */ - spin_lock_irqsave(&q->current_entry_lock, flags); + if (!oper) { + rcu_assign_pointer(q->oper_sched, new_admin); + err = 0; + new_admin = NULL; + goto unlock; + } - taprio_start_sched(sch, start, new_admin); + rcu_assign_pointer(q->admin_sched, new_admin); + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); + } else { + setup_first_close_time(q, new_admin, start); - rcu_assign_pointer(q->admin_sched, new_admin); - if (admin) - call_rcu(&admin->rcu, taprio_free_sched_cb); - new_admin = NULL; + /* Protects against advance_sched() */ + spin_lock_irqsave(&q->current_entry_lock, flags); - spin_unlock_irqrestore(&q->current_entry_lock, flags); + taprio_start_sched(sch, start, new_admin); + rcu_assign_pointer(q->admin_sched, new_admin); + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); + + spin_unlock_irqrestore(&q->current_entry_lock, flags); + } + + new_admin = NULL; err = 0; unlock: @@ -1080,6 +1426,13 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; + if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags)) + goto options_error; + + if (q->txtime_delay && + nla_put_s32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) + goto options_error; + if (oper && dump_schedule(skb, oper)) goto options_error; diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 505628884783..bdbf4b3125b6 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -738,13 +738,17 @@ kci_test_ipsec_offload() sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ sysfsf=$sysfsd/ipsec sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ + probed=false # setup netdevsim since dummydev doesn't have offload support - modprobe netdevsim - check_err $? - if [ $ret -ne 0 ]; then - echo "FAIL: ipsec_offload can't load netdevsim" - return 1 + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + modprobe -q netdevsim + check_err $? + if [ $ret -ne 0 ]; then + echo "SKIP: ipsec_offload can't load netdevsim" + return $ksft_skip + fi + probed=true fi echo "0" > /sys/bus/netdevsim/new_device @@ -824,7 +828,7 @@ EOF fi # clean up any leftovers - rmmod netdevsim + $probed && rmmod netdevsim if [ $ret -ne 0 ]; then echo "FAIL: ipsec_offload" |